Creating the DATA SET

In a first step the financial data from Chinese firms was downloaded from Bloomberg and uploaded in R. It includes all firms with the country of domicile being China that were part of the following GICS subindustries: IT Consulting & Other Services (GICS 45102010), Data Processing & Outsourced Services (GICS 45102020), Internet Services & Infrastructure (GICS 45102030), Application Software (GICS 45103010), Systems Software (GICS 45103020), Communications Equipement (GICS 45201020), Technology Hardware, Storage & Peripherals (GICS 45202030), Electronic Equipment & Instruments (GICS 45203010), Electronic Components (GICS 45203015), Electronic Manufacturing Services (GICS 45203020), Technology Distributors (GICS 45203030), Internet & Direct Marketing Retail (GICS 25502020), Interactive Media & Services (GICS 50203010) and Movies & Entertainment (GICS 50202010

The following information for 16 calendar quarters (Q1 2019 -Q4 2022) was collected: (1) GICS codes at the Subindustry level, (2) average market cap, (3) revenue and (4) profits as measured by earnings before interest and taxes (EBIT), (6) currency and (7) their financial market ticker as a unique identifier.

The data sets were uloaded piece by piece due to size limits and then joined by a unique identifier.

### importing datasets
setwd("~/Desktop/Masterarbeit/Data/R_Master")



#currency + GICS

GICS_Cur_Exc <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/GICS_Currency_Exchange_onlyfirms.xlsx")


#market cap

MC1920 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/MarketCAP_qrt16,9_01012023_onlyfirms.xlsx")


MC2122 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/MarketCAP_qrt8,1_01012023_onlyfirms.xlsx")


#revenue

Rev1920 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/Revenue_qrt16,9_01012023_onlyfirms.xlsx")

Rev2122 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/Revenue_qrt8,1_01012023_onlyfirms.xlsx")

#IBIT

IBIT1920 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/IBIT_qrt16,9_01012023_onlyfirms.xlsx")

IBIT2122 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/IBIT_qrt8,1_01012023_onlyfirms.xlsx")

  
  ### joining data sets 1 by 1
  
  df2<- full_join(GICS_Cur_Exc, MC1920, by = "Ticker")
df3<- full_join(df2, MC2122, by = "Ticker")
df4<- full_join(df3, Rev1920, by = "Ticker")
df5<- full_join(df4, Rev2122, by = "Ticker")
df6<- full_join(df5, IBIT1920, by = "Ticker")
dffull<- full_join(df6, IBIT2122, by = "Ticker")

Converting data to USD

In a second step the spot exchanged rates were added and all values were converted to USD. In order to achieve that the data set was first filtered for firms without values for currency (2 columns were excluded that missed not only currency but almost all necessary data including names (688496 CH Equity, 301379)). Attached below is an overview over the missing financial information in the data set and the variable names used in the analysis.

#Dropping rows with NAs for currency

dffull <- dffull %>% drop_na(Curncy)

#changing data from wide to long
df_long <- dffull %>%
  pivot_longer(cols = starts_with("Market Cap:") | starts_with("Revenue:") | starts_with("EBIT:"),
               names_to = c("Variable", "Quarter"),
               names_sep = ":") %>%
  pivot_wider(names_from = "Variable",
              values_from = "value")


##### joining official exchange rates by quarter 

exch_rate <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/Exchange_rates_formated.xlsx")

dflong_1 <- left_join(df_long, exch_rate, by = c('Quarter'))

###renaming Market Cap to Market_CAP
dflong_1 <- dplyr::rename(dflong_1, Market_Cap = "Market Cap")



###calculating USD values for Market_CAP, Revenue and EBIT


df_adj <- dflong_1 %>%
  mutate(Adj_Market_Cap = if_else(Curncy == "USD", Market_Cap,
                                  if_else(Curncy == "CNY", Market_Cap * Exch_CNY,
                                          if_else(Curncy == "HKD", Market_Cap * Exch_HKD,
                                                  if_else(Curncy == "TWD", Market_Cap * Exch_TWD,
                                                          if_else(Curncy == "SGD", Market_Cap * Exch_SGD,
                                                                  if_else(Curncy == "AUD", Market_Cap * Exch_AUD, NA_real_)))))),
         Adj_Revenue = if_else(Curncy == "USD", Revenue,
                               if_else(Curncy == "CNY", Revenue * Exch_CNY,
                                       if_else(Curncy == "HKD", Revenue * Exch_HKD,
                                               if_else(Curncy == "TWD", Revenue * Exch_TWD,
                                                       if_else(Curncy == "SGD", Revenue * Exch_SGD,
                                                               if_else(Curncy == "AUD", Revenue * Exch_AUD, NA_real_)))))),
         Adj_EBIT = if_else(Curncy == "USD", EBIT,
                            if_else(Curncy == "CNY", EBIT * Exch_CNY,
                                    if_else(Curncy == "HKD", EBIT * Exch_HKD,
                                            if_else(Curncy == "TWD", EBIT * Exch_TWD,
                                                    if_else(Curncy == "SGD", EBIT * Exch_SGD,
                                                            if_else(Curncy == "AUD", EBIT * Exch_AUD, NA_real_)))))))   

# re-transform the relevant data back to a wide format

df_sel <- df_adj %>% select(1:3,6,15:17)

df_wide <- df_sel %>%
  pivot_wider(names_from = Quarter,
              values_from = c(Adj_Market_Cap, Adj_Revenue, Adj_EBIT))

# renaming the variables in preperation for the data analysis

# get list of variable names
var_names <- names(df_wide)

for (i in 1:length(var_names)) {
  if (grepl("Adj_Market_Cap_Q", var_names[i])) {
    new_name <- paste0("MC_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
    names(df_wide)[i] <- new_name
  }
  if (grepl("Adj_Revenue_Q", var_names[i])) {
    new_name <- paste0("Rev_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
    names(df_wide)[i] <- new_name
  }
  if (grepl("Adj_EBIT_Q", var_names[i])) {
    new_name <- paste0("EBIT_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
    names(df_wide)[i] <- new_name
  }
}
#renaming variable subindustry
df_wide <- dplyr::rename(df_wide, GICS_SubInd = "GICS SubInd")

#turning GICS Subindustry into character variable
df_wide$GICS_SubInd <- as.character(df_wide$GICS_SubInd)

# check new variable names
names(df_wide)
 [1] "Ticker"      "Name"        "GICS_SubInd" "MC_Q1"       "MC_Q2"      
 [6] "MC_Q3"       "MC_Q4"       "MC_Q5"       "MC_Q6"       "MC_Q7"      
[11] "MC_Q8"       "MC_Q9"       "MC_Q10"      "MC_Q11"      "MC_Q12"     
[16] "MC_Q13"      "MC_Q14"      "MC_Q15"      "MC_Q16"      "Rev_Q1"     
[21] "Rev_Q2"      "Rev_Q3"      "Rev_Q4"      "Rev_Q5"      "Rev_Q6"     
[26] "Rev_Q7"      "Rev_Q8"      "Rev_Q9"      "Rev_Q10"     "Rev_Q11"    
[31] "Rev_Q12"     "Rev_Q13"     "Rev_Q14"     "Rev_Q15"     "Rev_Q16"    
[36] "EBIT_Q1"     "EBIT_Q2"     "EBIT_Q3"     "EBIT_Q4"     "EBIT_Q5"    
[41] "EBIT_Q6"     "EBIT_Q7"     "EBIT_Q8"     "EBIT_Q9"     "EBIT_Q10"   
[46] "EBIT_Q11"    "EBIT_Q12"    "EBIT_Q13"    "EBIT_Q14"    "EBIT_Q15"   
[51] "EBIT_Q16"   
# count the number of NAs by variable
na_counts <- colSums(is.na(df_wide))
na_counts 
     Ticker        Name GICS_SubInd       MC_Q1       MC_Q2       MC_Q3 
          0           0           0         445         434         423 
      MC_Q4       MC_Q5       MC_Q6       MC_Q7       MC_Q8       MC_Q9 
        396         365         347         321         264         239 
     MC_Q10      MC_Q11      MC_Q12      MC_Q13      MC_Q14      MC_Q15 
        206         177         158         131         105          79 
     MC_Q16      Rev_Q1      Rev_Q2      Rev_Q3      Rev_Q4      Rev_Q5 
         32         448         442         394         388         320 
     Rev_Q6      Rev_Q7      Rev_Q8      Rev_Q9     Rev_Q10     Rev_Q11 
        318         259         259         235         224         176 
    Rev_Q12     Rev_Q13     Rev_Q14     Rev_Q15     Rev_Q16     EBIT_Q1 
        168         165         168         130         119         454 
    EBIT_Q2     EBIT_Q3     EBIT_Q4     EBIT_Q5     EBIT_Q6     EBIT_Q7 
        445         402         394         327         319         265 
    EBIT_Q8     EBIT_Q9    EBIT_Q10    EBIT_Q11    EBIT_Q12    EBIT_Q13 
        264         243         233         181         175         172 
   EBIT_Q14    EBIT_Q15    EBIT_Q16 
        170         138         128 

Calculating Concentration Measures

In the next step the four concentration measures - HHI Market Cap, CR4 Market Cap, HHI Revenue CR4 Revenue - were calculated for the 16 GICS subindsutries with the amount of firms ranging from 8 to 177 per market. The graphs below provides a graphical illustration of the development of the different market concentration measures – HHI Revenue, CR4 Revenue, HHI Market Cap, CR4 Market Cap – over the 16 quarters. The dotted line perpendicular to quarter 8 represents the cutoff point that delineates the time before and after the new regulatory approach took effect. For the two-revenue based concentration measures it is difficult to detect any pattern around the cutoff. This is different for the concentration measures based on market capitalization. The CR4 MC graph shows a general decrease in market concentration during the treatment period with the notable exception of the most concentrated markets. Meanwhile the more comprehensive HHI MC measure shows a sharp decline shortly after the cutoff for the two most concentrated markets (GICS 50203010: Interactive Media & Services; GICS 25502020: Internet & Direct Marketing Retail), while no substantial changes can be observed for the large number of low concentrated market.

### calculating HHI

# n_distinct(df_wide$GICS_SubInd)
###we have 16 different GICS subindustries
table(df_wide$GICS_SubInd)

25502020 45102010 45102020 45102030 45103010 45103020 45201020 45202030 
      35       86        8       22      129       39      114       48 
45203010 45203015 45203020 45203030 45301010 45301020 50202010 50203010 
     177      175       16       15       47      144       47       40 
#with the amount of firms ranging from 8 to 177 per sub-industry.

# create a list of unique GICS subindustries
subindustries <- unique(df_wide$GICS_SubInd)

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
    subset_data <- df_wide[, c("GICS_SubInd", paste0("Rev_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the total revenue in the subindustry, ignoring NAs
    total_revenue <- sum(subset_data[, 2], na.rm = TRUE)
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$market_share <- subset_data[, 2] / total_revenue
    
    # calculate the squared market share of each firm and sum them up
    subset_data$squared_market_share <- subset_data$market_share^2
    hhi <- sum(subset_data$squared_market_share, na.rm = TRUE)
    
    # assign the HHI value to the corresponding column and row in the original data
    col_name <- paste0("HHIRev_SubInd_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- hhi
  }
}

#Calculating the CR4 Concentration Ratio

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
   
    subset_data <- df_wide[, c("GICS_SubInd", paste0("Rev_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$market_share <- subset_data[, 2] / sum(subset_data[, 2], na.rm = TRUE)
    
    # select the market shares of the four largest firms, ignoring NAs
    top_four <- head(subset_data[order(subset_data$market_share, decreasing = TRUE), "market_share"], 4)
    
    # calculate the CR4
    cr4 <- sum(top_four, na.rm = TRUE)
    
    # assign the CR4 value to the corresponding column and row in the original data
    col_name <- paste0("CR4Rev_Subind_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- cr4
  }
}


#Calculating the concentration measures for Market CAP 

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
    subset_data <- df_wide[, c("GICS_SubInd", paste0("MC_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the total revenue in the subindustry, ignoring NAs
    total_MC <- sum(subset_data[, 2], na.rm = TRUE)
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$MC_share <- subset_data[, 2] / total_MC
    
    # calculate the squared market share of each firm and sum them up
    subset_data$squared_MC_share <- subset_data$MC_share^2
    hhiMC <- sum(subset_data$squared_MC_share, na.rm = TRUE)
    
    # assign the HHI value to the corresponding column and row in the original data
    col_name <- paste0("HHIMC_SubInd_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- hhiMC
  }
}

#Calculating the CR4 Concentration Ratio for MC

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
    
    subset_data <- df_wide[, c("GICS_SubInd", paste0("MC_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$MC_share <- subset_data[, 2] / sum(subset_data[, 2], na.rm = TRUE)
    
    # select the market shares of the four largest firms, ignoring NAs
    top_four <- head(subset_data[order(subset_data$MC_share, decreasing = TRUE), "MC_share"], 4)
    
    # calculate the CR4
    cr4_MC <- sum(top_four, na.rm = TRUE)
    
    # assign the CR4 value to the corresponding column and row in the original data
    col_name <- paste0("CR4MC_Subind_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- cr4_MC
  }
}

#First we create the data frames for the concentration measures

# Step 1:  HHI Revenue values per quarter per subindsutry

# select columns for HHI (REV) and GICS_SubInd
df_HHI <- df_wide %>%
  select(GICS_SubInd, starts_with("HHIRev_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_HHI <- df_HHI[!duplicated(df_HHI[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_HHI_Rev <- melt(df_HHI, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "HHI")

# Convert the Quarter variable to numeric
df_HHI_Rev$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", df_HHI_Rev$Quarter))

# Plot the data using ggplot2
ggplot(df_HHI_Rev, aes(x = Quarter, y = HHI, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHI", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("HHI Revenue") +
  theme_minimal()

# we can see some encouraging albeit small drop-offs for several 
#gics subindustries at Q8  which is our intended cut-off point for the RDD

### Step 2: CR4 Revenue

# select columns for CR4 (REV) and GICS_SubInd
df_CR4 <- df_wide %>%
  select(GICS_SubInd, starts_with("CR4Rev_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_CR4 <- df_CR4[!duplicated(df_CR4[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_CR4 <- melt(df_CR4, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "CR4")

# Convert the Quarter variable to numeric
df_CR4$Quarter <- as.numeric(sub("CR4Rev_Subind_Q", "", df_CR4$Quarter))

# Plot the data using ggplot2
ggplot(df_CR4, aes(x = Quarter, y = CR4, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "CR4", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("CR4 Revenue") +
  theme_minimal()

### Step 3 HHI MC
#Create a new data frame with HHI (Market Cap) values per quarter per subindsutry

# select columns for HHI (Market Cap) and GICS_SubInd
df_HHIMC <- df_wide %>%
  select(GICS_SubInd, starts_with("HHIMC_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_HHIMC <- df_HHIMC[!duplicated(df_HHIMC[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_HHIMC <- melt(df_HHIMC, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "HHIMC")

# Convert the Quarter variable to numeric
df_HHIMC$Quarter <- as.numeric(sub("HHIMC_SubInd_Q", "", df_HHIMC$Quarter))

# Plot the data using ggplot2
ggplot(df_HHIMC, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("HHI Market Cap") +
  theme_minimal()

##here we can see the sharp dropoffs after the regulatory approach for the 2 most concentrated markets

# Step 4: CR4 Market Cap
#Create a new data frame with HHI (Market Cap) values per quarter per subindsutry

# select columns for CR4 (Market Cap) and GICS_SubInd
df_CR4MC <- df_wide %>%
  select(GICS_SubInd, starts_with("CR4MC_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_CR4MC <- df_CR4MC[!duplicated(df_CR4MC[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_CR4MC <- melt(df_CR4MC, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "CR4MC")

# Convert the Quarter variable to numeric
df_CR4MC$Quarter <- as.numeric(sub("CR4MC_Subind_Q", "", df_CR4MC$Quarter))

# Plot the data using ggplot2
ggplot(df_CR4MC, aes(x = Quarter, y = CR4MC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "CR4MC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("CR4 Market Cap") +
  theme_minimal()

Testing Hypothesis 1

The first method of testing hypthesis 1 was a Regression Discontinuity Design. However, this did not produce significant results. For the regression discontinuity design, both revenue-based concentration measures portray a small negative treatment effect (HHI Rev -0.05 and CR4 Rev -0.037) that is not significant (p value HHI Rev = 0.968, p value CR4 Rev = 0.810). For the MC concentration measures the regression discontinuity also portrays a small negative treatment effect (HHI MC -0.009 and CR4 MC -0.008) with very high p values (p value HHI MC = 0.955, p value CR4 MC = 0.957). This can be attributed to the low number of observations at the market level. 16 GICS subindustries with 256 observation points in total are in the lower range of acceptable data quantity for RDDs.

### Testing Hypothesis 1: Change in regulatory approach has led to reduced market concentration.

# Method 1 RDD

#First, RDD on HHI Rev
# Create a binary treatment variable based on cutoff
df_HHI_Rev$treatment <- ifelse(df_HHI_Rev$Quarter >= 9, 1, 0)


# Specify variable names
run_var <- df_HHI_Rev$Quarter
treat_var <- df_HHI_Rev$treatment
out_var <- df_HHI_Rev$HHI

# Specify cutoff value
cutoff <-   8


rddHHI_results <- rdrobust(y = df_HHI_Rev$HHI, x = df_HHI_Rev$Quarter, c = cutoff,
                          kernel = "tri", bwselect = "mserd", p = 1)

summary(rddHHI_results)
Sharp RD estimates using local polynomial regression.

Number of Obs.                  256
BW type                       mserd
Kernel                   Triangular
VCE method                       NN

Number of Obs.                  112          144
Eff. Number of Obs.              32           48
Order est. (p)                    1            1
Order bias  (q)                   2            2
BW est. (h)                   2.654        2.654
BW bias (b)                   4.608        4.608
rho (h/b)                     0.576        0.576
Unique Obs.                       7            9

=============================================================================
        Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
=============================================================================
  Conventional    -0.005     0.127    -0.040     0.968    [-0.254 , 0.243]     
        Robust         -         -    -0.055     0.956    [-0.353 , 0.333]     
=============================================================================
### no significant results, it is not even close.

#Second CR4 Rev

# Create a binary treatment variable based on cutoff
df_CR4$treatment <- ifelse(df_CR4$Quarter >= 9, 1, 0)


# Specify variable names
run_var <- df_CR4$Quarter
treat_var <- df_CR4$treatment
out_var <- df_CR4$CR4

# Specify cutoff value
cutoff <-   8


rddCR4_results <- rdrobust(y = df_CR4$CR4, x = df_CR4$Quarter, c = cutoff,
                           kernel = "tri", bwselect = "mserd", p = 1)

summary(rddCR4_results)
Sharp RD estimates using local polynomial regression.

Number of Obs.                  256
BW type                       mserd
Kernel                   Triangular
VCE method                       NN

Number of Obs.                  112          144
Eff. Number of Obs.              32           48
Order est. (p)                    1            1
Order bias  (q)                   2            2
BW est. (h)                   2.657        2.657
BW bias (b)                   4.614        4.614
rho (h/b)                     0.576        0.576
Unique Obs.                       7            9

=============================================================================
        Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
=============================================================================
  Conventional    -0.037     0.152    -0.240     0.810    [-0.335 , 0.262]     
        Robust         -         -    -0.249     0.803    [-0.465 , 0.360]     
=============================================================================
## again no significance

#third, HHI MC
# Create a binary treatment variable based on cutoff
df_HHIMC$treatment <- ifelse(df_HHIMC$Quarter >= 9, 1, 0)


# Specify variable names
run_var <- df_HHIMC$Quarter
treat_var <- df_HHIMC$treatment
out_var <- df_HHIMC$HHIMC

# Specify cutoff value
cutoff <-   8


rddHHIMC_results <- rdrobust(y = df_HHIMC$HHIMC, x = df_HHIMC$Quarter, c = cutoff,
                           kernel = "tri", bwselect = "mserd", p = 1)

summary(rddHHIMC_results)
Sharp RD estimates using local polynomial regression.

Number of Obs.                  256
BW type                       mserd
Kernel                   Triangular
VCE method                       NN

Number of Obs.                  112          144
Eff. Number of Obs.              32           48
Order est. (p)                    1            1
Order bias  (q)                   2            2
BW est. (h)                   2.690        2.690
BW bias (b)                   4.630        4.630
rho (h/b)                     0.581        0.581
Unique Obs.                       7            9

=============================================================================
        Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
=============================================================================
  Conventional    -0.009     0.158    -0.056     0.955    [-0.319 , 0.301]     
        Robust         -         -    -0.063     0.950    [-0.436 , 0.409]     
=============================================================================
#again small negative effect but not even close to being significan

#fourth, CR4 MC

# Create a binary treatment variable based on cutoff
df_CR4MC$treatment <- ifelse(df_CR4MC$Quarter >= 9, 1, 0)

##small negative effect (like the others) but also not significant


# Specify variable names
run_var <- df_CR4MC$Quarter
treat_var <- df_CR4MC$treatment
out_var <- df_CR4MC$CR4MC

# Specify cutoff value
cutoff <-   8


rddCR4MC_results <- rdrobust(y = df_CR4MC$CR4MC, x = df_CR4MC$Quarter, c = cutoff,
                             kernel = "tri", bwselect = "mserd", p = 1)

summary(rddCR4MC_results)
Sharp RD estimates using local polynomial regression.

Number of Obs.                  256
BW type                       mserd
Kernel                   Triangular
VCE method                       NN

Number of Obs.                  112          144
Eff. Number of Obs.              32           48
Order est. (p)                    1            1
Order bias  (q)                   2            2
BW est. (h)                   2.668        2.668
BW bias (b)                   4.624        4.624
rho (h/b)                     0.577        0.577
Unique Obs.                       7            9

=============================================================================
        Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
=============================================================================
  Conventional    -0.008     0.156    -0.054     0.957    [-0.314 , 0.297]     
        Robust         -         -    -0.038     0.970    [-0.428 , 0.412]     
=============================================================================
#also doesn't work

As a second method a two way fixed effects model (controlling for Subindustry and Quarter) was employed to test Hypothesis 1. However, only the CR4 market cap model portrays a small but highly significant positive treatment effect (0.038440 with a p value of 0.00158). One can only speculate for the reasons of this result . But with the more comprehensive HHI measure not being significant and CR4 only considering the top 4 firms in a market, we cannot confirm the hypothesis that the new regulatory approach has reduced market concentration in China’s digital economy as a whole.

#two way fixed effect model for all GICS subindustries

# for HHIMC using df_HHIMC

reg1 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = df_HHIMC)


# no statistically significant results for Treatment


#for CR4MC using df_CR4MC

reg2 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = df_CR4MC)


####significant results!
#but it doesnt really make any sense since the treatment effect should be negative not positive????
#maybe because it only takes into account the top 4 firms and they were not always hit equally it actually 
#increased concentration among top 4 firms -->mere speculation 


#now for HHI Revenue 

reg3 <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = df_HHI_Rev)


## no significant results


# for CR4 Revenu using df_CR4
reg4 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = df_CR4)

### as expected no significance


stargazer(reg1, reg2, reg3, reg4, title=" Two-way fixed effect model Treatment effect",type = "text")

Two-way fixed effect model Treatment effect
==========================================================================
                                           Dependent variable:            
                               -------------------------------------------
                                 HHIMC      CR4MC       HHI        CR4    
                                  (1)        (2)        (3)        (4)    
--------------------------------------------------------------------------
GICS_SubInd45102010            -0.580***  -0.738***  -0.215***  -0.607*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45102020            -0.230***    -0.018    0.054***   0.104*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45102030            -0.481***  -0.362***   0.317***    0.013   
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45103010            -0.566***  -0.638***  -0.220***  -0.637*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45103020            -0.489***  -0.392***  -0.103***  -0.273*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45201020            -0.566***  -0.650***  -0.148***  -0.343*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45202030            -0.381***  -0.291***    0.010    -0.118*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203010            -0.514***  -0.546***  -0.177***  -0.385*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203015            -0.570***  -0.643***  -0.185***  -0.484*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203020            -0.308***  -0.081***   0.320***   0.061*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203030            -0.281***  -0.141***  -0.038***    -0.001  
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45301010            -0.524***  -0.488***  -0.118***  -0.298*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45301020            -0.516***  -0.502***  -0.199***  -0.516*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd50202010            -0.504***  -0.434***  -0.138***  -0.324*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd50203010             0.282***    0.025     0.500***   0.091*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
Quarter                        -0.005***  -0.008***    -0.001     0.001   
                                (0.001)    (0.001)    (0.001)    (0.001)  
                                                                          
treatment                        0.008     0.038***    0.0004     -0.013  
                                (0.013)    (0.012)    (0.008)    (0.008)  
                                                                          
Constant                        0.646***   1.010***   0.261***   0.888*** 
                                (0.015)    (0.014)    (0.009)    (0.010)  
                                                                          
--------------------------------------------------------------------------
Observations                      256        256        256        256    
R2                               0.955      0.967      0.978      0.983   
Adjusted R2                      0.952      0.965      0.977      0.982   
Residual Std. Error (df = 238)   0.052      0.048      0.033      0.033   
F Statistic (df = 17; 238)     296.239*** 409.123*** 636.382*** 823.882***
==========================================================================
Note:                                          *p<0.1; **p<0.05; ***p<0.01

In a second, subsequent analysis, a subset of China’s digital economy built from the primary markets of the BATs was examined. Baidu and Tencent both belong to the Interactive Media & Services subindustry (GICS 50203010) while Alibaba belongs to the Internet & Direct Marketing Retail subindustry (GICS 25502020). A two-way fixed effects models yields a significant treatment effect for the HHI Market Cap model.Zooming in the HHI MC model we can see that, unsurprisingly, the GICS subindustry functions as a very strong predictor of the variance in HHI MC in a given market. Nevertheless, adding the treatment effect increased the adjusted R-Squared from 0,781 to 0.829 and, thus, represents a strong and significant predictor able to explain the decrease in market concentration following the new regulatory proposal.

#Method 3. let's try to test the effect on BATs markets

### creating a subset for the primary gics subindustries of the BATs

dfBAT1 <- subset(df_HHIMC, GICS_SubInd %in% c("50203010", "25502020"))
reg5 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
#significant treatment effect

#let's plot it

ggplot(dfBAT1, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("HHI Market Cap") +
  theme_minimal()

#### Now let's do it for CR4

dfBAT2 <- subset(df_CR4MC, GICS_SubInd %in% c("50203010", "25502020"))
reg6 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = dfBAT2)
##not sifnificant

#HHI Revenue

dfBAT3 <- subset(df_HHI_Rev, GICS_SubInd %in% c("50203010", "25502020"))
reg7 <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = dfBAT3)
#not significant

#CR4 Revenue

dfBAT4 <- subset(df_CR4, GICS_SubInd %in% c("50203010", "25502020"))
reg8 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = dfBAT4)
### also not significant

stargazer(reg5, reg6, reg7, reg8, title="Two-way fixed effects model BATs",type = "text")

Two-way fixed effects model BATs
=======================================================================
                                         Dependent variable:           
                              -----------------------------------------
                                HHIMC     CR4MC      HHI        CR4    
                                 (1)       (2)       (3)        (4)    
-----------------------------------------------------------------------
GICS_SubInd50203010           0.282***  0.025***   0.500***   0.091*** 
                               (0.024)   (0.003)   (0.017)    (0.003)  
                                                                       
Quarter                        0.011**    0.001    -0.006*     0.001   
                               (0.005)   (0.001)   (0.004)    (0.001)  
                                                                       
treatment                     -0.143***  -0.004     0.018      0.002   
                               (0.047)   (0.006)   (0.033)    (0.006)  
                                                                       
Constant                      0.586***  0.958***   0.296***   0.883*** 
                               (0.031)   (0.004)   (0.022)    (0.004)  
                                                                       
-----------------------------------------------------------------------
Observations                     32        32         32         32    
R2                              0.845     0.757     0.971      0.974   
Adjusted R2                     0.829     0.731     0.967      0.972   
Residual Std. Error (df = 28)   0.067     0.008     0.047      0.008   
F Statistic (df = 3; 28)      50.973*** 29.059*** 307.130*** 355.499***
=======================================================================
Note:                                       *p<0.1; **p<0.05; ***p<0.01
# only HHI MC has a significant treatment effect

###zooming in one the significant HHI Market Cap model


reg8a <- lm(HHIMC ~ GICS_SubInd , data = dfBAT1)

reg8b <- lm(HHIMC ~ GICS_SubInd + Quarter , data = dfBAT1)

reg8c <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)

stargazer(reg8a, reg8b, reg8c, title="Zoom in HHI MC Model",type = "text")

Zoom in HHI MC Model
=========================================================================================
                                             Dependent variable:                         
                    ---------------------------------------------------------------------
                                                    HHIMC                                
                              (1)                    (2)                    (3)          
-----------------------------------------------------------------------------------------
GICS_SubInd50203010        0.282***                0.282***               0.282***       
                            (0.027)                (0.027)                (0.024)        
                                                                                         
Quarter                                             -0.002                0.011**        
                                                   (0.003)                (0.005)        
                                                                                         
treatment                                                                -0.143***       
                                                                          (0.047)        
                                                                                         
Constant                   0.608***                0.629***               0.586***       
                            (0.019)                (0.031)                (0.031)        
                                                                                         
-----------------------------------------------------------------------------------------
Observations                  32                      32                     32          
R2                           0.790                  0.795                  0.845         
Adjusted R2                  0.783                  0.781                  0.829         
Residual Std. Error     0.075 (df = 30)        0.075 (df = 29)        0.067 (df = 28)    
F Statistic         112.722*** (df = 1; 30) 56.223*** (df = 2; 29) 50.973*** (df = 3; 28)
=========================================================================================
Note:                                                         *p<0.1; **p<0.05; ***p<0.01

In this chunk we do the robustness checks for the HHI MC model. First plot indicates that the regression model is linear. Just in case I played around with the Quarter variable checking if the treatment effect is still significant with Q^2 which it is. When testing with the studentized Breusch-Pagan test for heteroscedasticity we did not have sufficient evidence to reject the null hypothesis of homoscedasticity. However, the p-value is relatively close to 0.05, indicating that there may be a possibility of heteroscedasticity. The subsequent plot indicates that the data may be a bit heteroscedastic in the higher range of the predictor variable(s). However, as an additional robust check I rerun the regression while logging the dependant variable and the treatment effect remains significant. Further, the result of another studentized Breusch-Pagan test for the logged dependent variable suggests that there is no significant evidence of heteroscedasticity in the logged model.Lastly, plots show that the residuals as well as the error terms are normally distributed. A gap in the middle of the last plots indicates that there is a significant difference in the dependent variable between the treated and untreated groups, and this effect of the regulatory approach is not captured by the other variables in the model. In conclusion, the robustness checks confirm the validity of the traetment effect. While the model may not be perfectly linear, it passes all tests and the treatment effect persists in the robust models.

# Add predicted values to the data frame
dfBAT1$predicted <- predict(reg8c)

# Create line plot of actual and predicted values across quarters
ggplot(data = dfBAT1, aes(x = Quarter, y = HHIMC, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predicted, color = "Predicted")) +
  scale_color_manual(values = c("Actual" = "black", "Predicted" = "red")) +
  labs(x = "Quarter", y = "HHIMC", title = "Regression Model Performance")

#### Linearity
# Plotting fitted values against residuals
plot(reg5, 1)

#looks fine

#just in case I will do an additional robust check and see if the effect holds if Quarter is not linear but quadratic

r1 <- lm(HHIMC ~ GICS_SubInd + Quarter^2 + treatment, data = dfBAT1)
summary(r1)

Call:
lm(formula = HHIMC ~ GICS_SubInd + Quarter^2 + treatment, data = dfBAT1)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.12754 -0.03566  0.01853  0.03991  0.15055 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          0.58575    0.03084  18.991  < 2e-16 ***
GICS_SubInd50203010  0.28159    0.02356  11.954 1.63e-12 ***
Quarter              0.01098    0.00514   2.136  0.04159 *  
treatment           -0.14291    0.04739  -3.015  0.00541 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.06663 on 28 degrees of freedom
Multiple R-squared:  0.8452,    Adjusted R-squared:  0.8287 
F-statistic: 50.97 on 3 and 28 DF,  p-value: 1.813e-11
#nothing changes, very good

#### testing for homosecasticity


bp_test <- bptest(reg5)
bp_test

    studentized Breusch-Pagan test

data:  reg5
BP = 7.5308, df = 3, p-value = 0.05677
#The studentized Breusch-Pagan test tests for heteroscedasticity in the errors of a linear regression model. The null hypothesis is that the errors are homoscedastic, while the alternative hypothesis is that they are heteroscedastic.At the 0.05 significance level, we do not have sufficient evidence to reject the null hypothesis of homoscedasticity. However, the p-value is relatively close to 0.05, indicating that there may be some evidence of heteroscedasticity. 

plot(reg5, 3)

# the slight diagonal drop  in the higher range of the fitted values is a bit worrying and suggests that the variance of the residuals is increasing, indicating that the data may be heteroscedastic in the higher range of the predictor variable(s).

#as a check I will log the dependant variable

r2 <- lm(log(HHIMC) ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
summary(r2)

Call:
lm(formula = log(HHIMC) ~ GICS_SubInd + Quarter + treatment, 
    data = dfBAT1)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.21058 -0.04584  0.01383  0.04897  0.18958 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         -0.560958   0.042618 -13.162 1.63e-13 ***
GICS_SubInd50203010  0.381510   0.032550  11.721 2.59e-12 ***
Quarter              0.018953   0.007103   2.668  0.01254 *  
treatment           -0.207275   0.065487  -3.165  0.00372 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09207 on 28 degrees of freedom
Multiple R-squared:  0.8404,    Adjusted R-squared:  0.8233 
F-statistic: 49.14 on 3 and 28 DF,  p-value: 2.786e-11
#still significant
bp_test_log <- bptest(r2)
bp_test_log

    studentized Breusch-Pagan test

data:  r2
BP = 5.8729, df = 3, p-value = 0.118
#The result of the studentized Breusch-Pagan test for the logged dependent variable suggests that there is no significant evidence of heteroscedasticity in the model.
# Furter, even in the logged model the treatment effect was still significant and it is expected that our results are not perfectly linear.

#### Normality of residuals ####
plot(reg5, 2)

## residuals are close to the diagonal line indicating a normal distribution

plot(reg5$fitted.values, reg5$residuals)

#looks  fairly randomly distributed and suggests that there is no pattern in the errors and the assumptions of the linear regression model are being met. This indicates also that the residuals are uncorrelated and have constant variance, which are two important assumptions of linear regression. 
# The gap in the middle indicates that there is a significant difference in the dependent variable between the treated and untreated groups, and the treatment effect is not captured by the other variables in the model.

#ok all in all everything seems robust!

As a small extension I also checking whteher the treatment effect is also there when we include an additional GICS subindustry. In the data, there is also a comparatively small publicly traded Tencent Subsidiary called Tencent Music Entertainment which is part of the Movies & Entertainment subindustry (GICS 50202010). For claritiy (and consistency) this was exclueded from the main analysis, because none of the other numerous subsidiaries of the BATs have their own listing. As we can see, the tratment effect holds (again only for the HHI MC model),

#creating a subset with revelent subindustries (fo primary markets including tencent music: 50203010, 50202010,25502020)

#HHI MC

dfBAT5 <- subset(df_HHIMC, GICS_SubInd %in% c("50203010", "50202010", "25502020"))

#let's plot it first

ggplot(dfBAT5, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  theme_minimal()

reg9a <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT5)
#significant again

#### Now let's do it for CR4

dfBAT6 <- subset(df_CR4MC, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
reg9b <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = dfBAT6)
##not sifnificant

#HHI Revenue

dfBAT7 <- subset(df_HHI_Rev, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
reg9c <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = dfBAT7)
#not significant

#CR4 Rev

dfBAT8 <- subset(df_CR4, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
#not significant

reg9d <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = dfBAT8)
### also not significant


stargazer(reg9a, reg9b, reg9c, reg9d, title="BAT Model Including Tencent Music", type = "text")

BAT Model Including Tencent Music
=========================================================================
                                          Dependent variable:            
                              -------------------------------------------
                                HHIMC      CR4MC       HHI        CR4    
                                 (1)        (2)        (3)        (4)    
-------------------------------------------------------------------------
GICS_SubInd50202010           -0.504***  -0.434***  -0.138***  -0.324*** 
                               (0.021)    (0.011)    (0.015)    (0.009)  
                                                                         
GICS_SubInd50203010            0.282***   0.025**    0.500***   0.091*** 
                               (0.021)    (0.011)    (0.015)    (0.009)  
                                                                         
Quarter                         0.007*     0.0002     -0.002    0.004*** 
                               (0.004)    (0.002)    (0.003)    (0.002)  
                                                                         
treatment                      -0.090**    0.003      0.001     -0.026*  
                               (0.035)    (0.018)    (0.025)    (0.015)  
                                                                         
Constant                       0.591***   0.959***   0.272***   0.865*** 
                               (0.024)    (0.013)    (0.017)    (0.010)  
                                                                         
-------------------------------------------------------------------------
Observations                      48         48         48         48    
R2                              0.971      0.980      0.979      0.983   
Adjusted R2                     0.968      0.978      0.977      0.981   
Residual Std. Error (df = 43)   0.060      0.032      0.042      0.025   
F Statistic (df = 4; 43)      357.124*** 530.292*** 502.808*** 604.128***
=========================================================================
Note:                                         *p<0.1; **p<0.05; ***p<0.01

Testing Hypothesis 2: In the digital economy, market concentration does not serve as a good predictor of profits.

The following models explore if market concentration can function as a predictor of profits in the digital economy by looking both at total profits and profit margins. Looking total profits by firm first, all concentration measures – HHI Rev, CR4 Rev, HHI MC, CR4 MC – have a positive and statistically significant relationship with EBIT. The crucial problem here for the concentration measures to be good predictor is that the R-squared values in the regression outputs are very low (R-squared of around 1 percent), indicating that the market concentration measures included in the model do not have a strong relationship with firms’ profits and that other factors play a more important role. The low explanatory value holds true for a time-fixed model (controlling for Quarter) and a two-way fixed effects model (controlling additionally for GICS Subindustry).

### Testing hypothesis 2: Lower market concentration is correlated with a reduction in firm's profits

# creating a new data set with market concentration measures and profits

df_EBIT <- df_wide %>% select(2,3,36:115)

#creating different data sets per market concentration measures 

df_EBIT_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("EBIT_Q"),
    names_to = "Quarter",
    values_to = "EBIT"
  ) %>%
  select(Name, GICS_SubInd, Quarter, EBIT)

df_EBIT_long$Quarter <- as.numeric(sub("EBIT_Q", "", df_EBIT_long$Quarter))

df_HHIRev_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("HHIRev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIRev"
  )  %>%
  select(Name, GICS_SubInd, Quarter, HHIRev)

df_HHIRev_long$Quarter <- as.numeric(sub("HHIRev_SubInd_Q", "", df_HHIRev_long$Quarter))


df_CR4Rev_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("CR4Rev_Subind_Q"),
    names_to = "Quarter",
    values_to = "CR4Rev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, CR4Rev)

df_CR4Rev_long$Quarter <- as.numeric(sub("CR4Rev_Subind_Q", "", df_CR4Rev_long$Quarter))

df_HHIMC_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  )  %>%
  select(Name, GICS_SubInd, Quarter, HHIMC)

df_HHIMC_long$Quarter <- as.numeric(sub("HHIMC_SubInd_Q", "", df_HHIMC_long$Quarter))


df_CR4MC_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("CR4MC_Subind_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  )  %>%
  select(Name, GICS_SubInd, Quarter, CR4MC)

df_CR4MC_long$Quarter <- as.numeric(sub("CR4MC_Subind_Q", "", df_CR4MC_long$Quarter))

#mergins the dfs 

EBIT_merg  <- df_EBIT_long %>%
  left_join(df_HHIRev_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(df_CR4Rev_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(df_HHIMC_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(df_CR4MC_long, by = c("Name", "GICS_SubInd", "Quarter"))




###starting with HHI REV

reg10a <- lm(EBIT ~ HHIRev, data = EBIT_merg )

### HHI highly significant, increase from 0 to 1 (maybe use 1-10000 scale instead)
#results in increased profits of 9.97 Billion in the hypothetical case of fully concentrated markets
# but very low explanatory value --> R-squared around 1 percent


###doing the same with CR4 revenue

reg10b <- lm(EBIT ~ CR4Rev, data = EBIT_merg )
### same thing, higher CR4 higher profits, even lower R-squared below 1 percent


###doing the same with HHI market cap

reg10c <- lm(EBIT ~ HHIMC, data = EBIT_merg )
### same stuff, but slightly highger R-squared 

###doing the same with CR4 market cap    

reg10d <- lm(EBIT ~ CR4MC, data = EBIT_merg )
#same stuff

stargazer(reg10a, reg10b, reg10c, reg10d, title="Naive Total Profits Regression Model",type = "text")

Naive Total Profits Regression Model
======================================================================================================================
                                                                  Dependent variable:                                 
                                 -------------------------------------------------------------------------------------
                                                                         EBIT                                         
                                         (1)                   (2)                  (3)                   (4)         
----------------------------------------------------------------------------------------------------------------------
HHIRev                           9,969,480,093.000***                                                                 
                                  (707,071,810.000)                                                                   
                                                                                                                      
CR4Rev                                                5,048,708,631.000***                                            
                                                        (551,009,734.000)                                             
                                                                                                                      
HHIMC                                                                       9,268,733,745.000***                      
                                                                             (635,073,963.000)                        
                                                                                                                      
CR4MC                                                                                            6,219,981,258.000*** 
                                                                                                   (571,910,343.000)  
                                                                                                                      
Constant                         -359,884,861.000***  -1,636,945,457.000***   -197,050,969.000   -1,838,777,713.000***
                                  (138,736,441.000)     (295,082,969.000)    (130,188,360.000)     (272,172,156.000)  
                                                                                                                      
----------------------------------------------------------------------------------------------------------------------
Observations                            13,962               13,962                13,962               13,962        
R2                                      0.014                 0.006                0.015                 0.008        
Adjusted R2                             0.014                 0.006                0.015                 0.008        
Residual Std. Error (df = 13960)  12,678,529,762.000   12,730,264,525.000    12,672,174,394.000   12,714,734,150.000  
F Statistic (df = 1; 13960)           198.801***            83.954***            213.006***           118.283***      
======================================================================================================================
Note:                                                                                      *p<0.1; **p<0.05; ***p<0.01
# all models with very low explanatory power


# let's check if this holds when doing a time fixed effects model

#HHI_Rev
reg11a <- lm(EBIT ~ HHIRev + Quarter, data = EBIT_merg )
# still positive effect of market concentration on profits but still low explanability

#CR4 Rev
reg11b <- lm(EBIT ~ CR4Rev + Quarter, data = EBIT_merg )

#HHI MC
reg11c <- lm(EBIT ~ HHIMC + Quarter, data = EBIT_merg )
### same stuff, but slightly highger R-squared 

#CR4 market cap    
reg11d <- lm(EBIT ~ CR4MC + Quarter, data = EBIT_merg )

stargazer(reg11a, reg11b, reg11c, reg11d,  title = "Time-fixed regression model total profits",type = "text")

Time-fixed regression model total profits
======================================================================================================================
                                                                  Dependent variable:                                 
                                 -------------------------------------------------------------------------------------
                                                                         EBIT                                         
                                         (1)                   (2)                  (3)                   (4)         
----------------------------------------------------------------------------------------------------------------------
HHIRev                           9,972,829,773.000***                                                                 
                                  (707,073,846.000)                                                                   
                                                                                                                      
CR4Rev                                                5,043,189,030.000***                                            
                                                        (551,045,076.000)                                             
                                                                                                                      
HHIMC                                                                       9,292,806,672.000***                      
                                                                             (635,277,881.000)                        
                                                                                                                      
CR4MC                                                                                            6,260,620,865.000*** 
                                                                                                   (572,499,263.000)  
                                                                                                                      
Quarter                             25,734,391.000       21,924,552.000        33,525,476.000       36,326,300.000    
                                   (23,648,451.000)     (23,746,403.000)      (23,644,118.000)     (23,740,334.000)   
                                                                                                                      
Constant                          -593,145,950.000**  -1,832,566,667.000***  -503,186,769.000**  -2,185,222,297.000***
                                  (255,333,272.000)     (363,272,053.000)    (252,116,368.000)     (354,023,950.000)  
                                                                                                                      
----------------------------------------------------------------------------------------------------------------------
Observations                            13,962               13,962                13,962               13,962        
R2                                      0.014                 0.006                0.015                 0.009        
Adjusted R2                             0.014                 0.006                0.015                 0.008        
Residual Std. Error (df = 13959)  12,678,446,120.000   12,730,331,805.000    12,671,715,779.000   12,714,123,339.000  
F Statistic (df = 2; 13959)           99.994***             42.403***            107.516***            60.318***      
======================================================================================================================
Note:                                                                                      *p<0.1; **p<0.05; ***p<0.01
# We can also check for two-way fixed effects

#HHI_Rev
reg12a <- lm(EBIT ~ HHIRev + Quarter + GICS_SubInd, data = EBIT_merg )
# still positive effect of market concentration on profits but still low explanability

#CR4 Rev
reg12b <- lm(EBIT ~ CR4Rev + Quarter + GICS_SubInd, data = EBIT_merg )

#HHI MC
reg12c <- lm(EBIT ~ HHIMC + Quarter + GICS_SubInd, data = EBIT_merg )
### same stuff, but slightly highger R-squared 

#CR4 market cap    
reg12d <- lm(EBIT ~ CR4MC + Quarter + GICS_SubInd, data = EBIT_merg )

stargazer(reg12a, reg12b, reg12c, reg12d,  title = "Two-way fixed effects model total profits ",type = "text")

Two-way fixed effects model total profits
========================================================================================================================
                                                                   Dependent variable:                                  
                                 ---------------------------------------------------------------------------------------
                                                                          EBIT                                          
                                          (1)                   (2)                   (3)                   (4)         
------------------------------------------------------------------------------------------------------------------------
HHIRev                            -2,415,021,042.000                                                                    
                                  (5,205,525,905.000)                                                                   
                                                                                                                        
CR4Rev                                                    479,461,543.000                                               
                                                        (3,354,860,381.000)                                             
                                                                                                                        
HHIMC                                                                         -2,164,839,308.000                        
                                                                              (3,606,344,556.000)                       
                                                                                                                        
CR4MC                                                                                                1,123,112,775.000  
                                                                                                    (2,460,722,039.000) 
                                                                                                                        
Quarter                             19,545,723.000        19,681,242.000        16,706,081.000        22,771,100.000    
                                   (23,578,854.000)      (23,633,794.000)      (24,171,825.000)      (24,365,414.000)   
                                                                                                                        
GICS_SubInd45102010                -584,207,090.000       234,849,696.000     -1,333,106,249.000      774,717,364.000   
                                  (1,396,142,449.000)   (2,193,394,789.000)   (2,275,545,105.000)   (1,992,978,141.000) 
                                                                                                                        
GICS_SubInd45102020                 445,667,812.000       270,969,761.000      -212,284,361.000       345,196,024.000   
                                  (1,682,256,170.000)   (1,694,463,397.000)   (1,881,667,415.000)   (1,661,075,666.000) 
                                                                                                                        
GICS_SubInd45102030                 830,633,091.000       73,448,698.000       -994,179,674.000       500,234,896.000   
                                  (1,905,718,755.000)   (1,000,052,745.000)   (2,046,611,156.000)   (1,362,144,641.000) 
                                                                                                                        
GICS_SubInd45103010                -516,036,290.000       330,383,856.000     -1,219,756,582.000      741,752,135.000   
                                  (1,405,719,770.000)   (2,283,334,952.000)   (2,216,793,486.000)   (1,758,674,162.000) 
                                                                                                                        
GICS_SubInd45103020                 -71,331,687.000       326,570,193.000      -895,621,486.000       644,020,358.000   
                                  (1,072,201,244.000)   (1,305,376,996.000)   (2,027,930,694.000)   (1,343,090,152.000) 
                                                                                                                        
GICS_SubInd45201020                -199,396,974.000       330,622,259.000     -1,079,324,794.000      897,733,036.000   
                                  (1,112,013,145.000)   (1,392,640,320.000)   (2,218,257,495.000)   (1,784,544,673.000) 
                                                                                                                        
GICS_SubInd45202030                1,330,985,604.000     1,374,983,230.000      475,097,003.000      1,644,830,644.000  
                                   (886,989,526.000)     (974,130,543.000)    (1,659,569,957.000)   (1,140,580,134.000) 
                                                                                                                        
GICS_SubInd45203010                -112,234,750.000       511,427,915.000      -810,336,196.000       944,438,045.000   
                                  (1,211,160,423.000)   (1,511,970,088.000)   (2,038,437,000.000)   (1,556,662,672.000) 
                                                                                                                        
GICS_SubInd45203015                 227,389,032.000       910,096,306.000      -574,338,857.000      1,402,359,536.000  
                                  (1,237,435,370.000)   (1,790,345,210.000)   (2,223,123,959.000)   (1,760,213,759.000) 
                                                                                                                        
GICS_SubInd45203020              5,854,035,096.000***  5,068,727,043.000***  4,400,640,541.000***  5,192,967,555.000*** 
                                  (2,048,326,751.000)   (1,254,639,632.000)   (1,697,313,767.000)   (1,256,351,458.000) 
                                                                                                                        
GICS_SubInd45203030                 691,185,744.000       788,799,253.000       147,655,019.000       954,767,204.000   
                                  (1,170,402,791.000)   (1,151,515,483.000)   (1,570,090,571.000)   (1,207,838,227.000) 
                                                                                                                        
GICS_SubInd45301010                 650,211,316.000      1,102,632,896.000     -206,563,640.000      1,522,265,119.000  
                                  (1,124,907,413.000)   (1,381,717,869.000)   (2,139,246,517.000)   (1,543,523,371.000) 
                                                                                                                        
GICS_SubInd45301020                 431,306,471.000      1,168,406,030.000     -212,945,021.000      1,483,701,820.000  
                                  (1,314,614,944.000)   (1,902,907,062.000)   (2,044,947,089.000)   (1,462,038,318.000) 
                                                                                                                        
GICS_SubInd50202010                -955,436,175.000      -459,636,869.000     -1,726,395,724.000     -124,318,990.000   
                                  (1,168,272,627.000)   (1,416,705,509.000)   (2,062,622,377.000)   (1,407,726,416.000) 
                                                                                                                        
GICS_SubInd50203010              12,818,297,784.000*** 11,599,444,560.000*** 12,214,504,712.000*** 11,615,368,924.000***
                                  (2,706,713,162.000)    (995,451,703.000)    (1,345,199,626.000)    (951,024,910.000)  
                                                                                                                        
Constant                            547,355,879.000      -497,109,213.000      1,291,821,575.000    -1,181,522,162.000  
                                  (1,532,860,892.000)   (3,064,500,265.000)   (2,393,762,998.000)   (2,543,329,396.000) 
                                                                                                                        
------------------------------------------------------------------------------------------------------------------------
Observations                            13,962                13,962                13,962                13,962        
R2                                       0.025                 0.025                 0.025                 0.025        
Adjusted R2                              0.023                 0.023                 0.023                 0.023        
Residual Std. Error (df = 13944)  12,617,378,083.000    12,617,466,220.000    12,617,312,433.000    12,617,381,213.000  
F Statistic (df = 17; 13944)           20.728***             20.716***             20.737***             20.728***      
========================================================================================================================
Note:                                                                                        *p<0.1; **p<0.05; ***p<0.01

Next we look at profit margins that are calculated on the basis for EBIT and Revenue. Few Columns with revenue = 0, NA were excluded because otherwise the profit margins become infinity. Running the same regression models on profits margins results once more in a statistically significant relationship between the concentration measures. However, this time it is negative. Nevertheless, again the models have very low R Squared value, which holds true when looking at the time-invariant effect and the two-way fixed effects model. All in all, market concentration explains only around 0.1 percent of the variation in profit margins.

#calculating profit margins

##converting revenue values to long format

df_REV_long <- df_wide %>%
  pivot_longer(
    cols = starts_with("REV_Q"),
    names_to = "Quarter",
    values_to = "REV"
  ) %>%
  select(Name, GICS_SubInd, Quarter, REV)

#converting quarter to numeric
df_REV_long$Quarter <- as.numeric(sub("Rev_Q", "", df_REV_long$Quarter))

df_Profitmarg <- df_EBIT_long %>%
  left_join(df_REV_long, by = c("Name", "GICS_SubInd", "Quarter"))

#calculating profimarg.  round((df$EBIT / df$REV) * 100, 2)

df_Profitmarg$Profitmarg <- ifelse(is.na(df_Profitmarg$REV) | is.na(df_Profitmarg$EBIT), NA, round((df_Profitmarg$EBIT / df_Profitmarg$REV) * 100, 2))


df_Profitmarg <- df_Profitmarg %>%
  left_join(df_HHIRev_long, by = c("Name", "GICS_SubInd", "Quarter"))

df_Profitmarg <- df_Profitmarg %>%
  left_join(df_CR4Rev_long, by = c("Name", "GICS_SubInd", "Quarter"))

df_Profitmarg <- df_Profitmarg %>%
  left_join(df_HHIMC_long, by = c("Name", "GICS_SubInd", "Quarter"))

df_Profitmarg <- df_Profitmarg %>%
  left_join(df_CR4MC_long, by = c("Name", "GICS_SubInd", "Quarter"))

#### we have the problem that for some obervations are negative and positive infinity due to revenue being 0
#in order to do a regression we have to exclude these observations

df_Profitmarg <- df_Profitmarg[!df_Profitmarg$Profitmarg %in% c(-Inf, Inf),]


#effect of HHIRev on Profitmarg
reg13a <- lm(Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)
summary(reg13a)

Call:
lm(formula = Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)

Residuals:
      Min        1Q    Median        3Q       Max 
-24578116     -1352      -460      1445     35594 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)     3669       2303   1.593 0.111121    
HHIRev        -44692      11969  -3.734 0.000189 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 209000 on 13853 degrees of freedom
  (4335 observations deleted due to missingness)
Multiple R-squared:  0.001006,  Adjusted R-squared:  0.0009334 
F-statistic: 13.94 on 1 and 13853 DF,  p-value: 0.0001892
#coefficient negative and extremely weak R squared

#effect of CR4Rev on Profitmarg
reg13b <- lm(Profitmarg ~ CR4Rev, data = df_Profitmarg, na.action = na.omit)
##same negative coefficient an weak R squared

#HHI MC
reg13c <- lm(Profitmarg ~ HHIMC, data = df_Profitmarg, na.action = na.omit)
#same  negative and weak

#CR4 MC
reg13d <- lm(Profitmarg ~ CR4MC, data = df_Profitmarg, na.action = na.omit)
#again

stargazer(reg13a, reg13b, reg13c, reg13d, title = "Naive Model Profit Margins firm level", type = "text") 

Naive Model Profit Margins firm level
===========================================================================================
                                                    Dependent variable:                    
                                 ----------------------------------------------------------
                                                         Profitmarg                        
                                      (1)            (2)           (3)            (4)      
-------------------------------------------------------------------------------------------
HHIRev                           -44,691.610***                                            
                                  (11,968.610)                                             
                                                                                           
CR4Rev                                          -22,710.000**                              
                                                 (9,141.645)                               
                                                                                           
HHIMC                                                         -46,979.970***               
                                                               (10,824.840)                
                                                                                           
CR4MC                                                                        -28,237.340***
                                                                              (9,535.257)  
                                                                                           
Constant                           3,669.307     9,477.674*     3,547.671     10,485.920** 
                                  (2,302.995)    (4,878.042)   (2,161.850)    (4,515.542)  
                                                                                           
-------------------------------------------------------------------------------------------
Observations                         13,855        13,855         13,855         13,855    
R2                                   0.001         0.0004         0.001          0.001     
Adjusted R2                          0.001         0.0004         0.001          0.001     
Residual Std. Error (df = 13853)  208,950.900    209,009.500   208,914.100    208,989.900  
F Statistic (df = 1; 13853)        13.943***       6.171**      18.836***       8.770***   
===========================================================================================
Note:                                                           *p<0.1; **p<0.05; ***p<0.01
### now we test again for the time fixed effect

#effect of HHIRev on Profitmarg
reg14a <- lm(Profitmarg ~ HHIRev + Quarter, data = df_Profitmarg, na.action = na.omit)
summary(reg13a)

Call:
lm(formula = Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)

Residuals:
      Min        1Q    Median        3Q       Max 
-24578116     -1352      -460      1445     35594 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)     3669       2303   1.593 0.111121    
HHIRev        -44692      11969  -3.734 0.000189 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 209000 on 13853 degrees of freedom
  (4335 observations deleted due to missingness)
Multiple R-squared:  0.001006,  Adjusted R-squared:  0.0009334 
F-statistic: 13.94 on 1 and 13853 DF,  p-value: 0.0001892
#effect of CR4Rev on Profitmarg
reg14b <- lm(Profitmarg ~ CR4Rev + Quarter, data = df_Profitmarg, na.action = na.omit)
##same negative coefficient an weak R squared

#HHI MC
reg14c <- lm(Profitmarg ~ HHIMC + Quarter, data = df_Profitmarg, na.action = na.omit)

#same  negative and weak

#CR4 MC
reg14d <- lm(Profitmarg ~ CR4MC + Quarter, data = df_Profitmarg, na.action = na.omit)
#again

stargazer(reg14a, reg14b, reg14c, reg14d, title = "Time-fixed Model Profit Margins firm level", type = "text") 

Time-fixed Model Profit Margins firm level
===========================================================================================
                                                    Dependent variable:                    
                                 ----------------------------------------------------------
                                                         Profitmarg                        
                                      (1)            (2)           (3)            (4)      
-------------------------------------------------------------------------------------------
HHIRev                           -44,718.890***                                            
                                  (11,968.560)                                             
                                                                                           
CR4Rev                                          -22,595.640**                              
                                                 (9,142.254)                               
                                                                                           
HHIMC                                                         -47,301.970***               
                                                               (10,828.050)                
                                                                                           
CR4MC                                                                        -28,768.040***
                                                                              (9,545.028)  
                                                                                           
Quarter                             -426.638      -412.088       -466.249       -477.446   
                                   (391.275)      (391.412)     (391.323)      (391.750)   
                                                                                           
Constant                           7,534.872*   13,151.340**    7,805.198*    15,039.170** 
                                  (4,227.508)    (5,997.554)   (4,176.391)    (5,860.637)  
                                                                                           
-------------------------------------------------------------------------------------------
Observations                         13,855        13,855         13,855         13,855    
R2                                   0.001          0.001         0.001          0.001     
Adjusted R2                          0.001         0.0004         0.001          0.001     
Residual Std. Error (df = 13852)  208,949.500    209,008.700   208,910.900    208,986.200  
F Statistic (df = 2; 13852)         7.566***       3.640**      10.128***       5.128***   
===========================================================================================
Note:                                                           *p<0.1; **p<0.05; ***p<0.01
### now we test again for the two-way fixed effects model

#effect of HHIRev on Profitmarg
reg15a <- lm(Profitmarg ~ HHIRev + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
summary(reg13a)

Call:
lm(formula = Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)

Residuals:
      Min        1Q    Median        3Q       Max 
-24578116     -1352      -460      1445     35594 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)     3669       2303   1.593 0.111121    
HHIRev        -44692      11969  -3.734 0.000189 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 209000 on 13853 degrees of freedom
  (4335 observations deleted due to missingness)
Multiple R-squared:  0.001006,  Adjusted R-squared:  0.0009334 
F-statistic: 13.94 on 1 and 13853 DF,  p-value: 0.0001892
#effect of CR4Rev on Profitmarg
reg15b <- lm(Profitmarg ~ CR4Rev + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
##same negative coefficient an weak R squared

#HHI MC
reg15c <- lm(Profitmarg ~ HHIMC + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)

#same  negative and weak

#CR4 MC
reg15d <- lm(Profitmarg ~ CR4MC + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
#again

stargazer(reg15a, reg15b, reg15c, reg15d, title = "Two-way fixed effects Model Profit Margins firm level" ,type = "text") 

Two-way fixed effects Model Profit Margins firm level
=============================================================================================
                                                     Dependent variable:                     
                                 ------------------------------------------------------------
                                                          Profitmarg                         
                                       (1)            (2)            (3)            (4)      
---------------------------------------------------------------------------------------------
HHIRev                           278,765.200***                                              
                                  (87,654.730)                                               
                                                                                             
CR4Rev                                             6,526.000                                 
                                                  (55,791.600)                               
                                                                                             
HHIMC                                                             96,763.700                 
                                                                 (60,528.570)                
                                                                                             
CR4MC                                                                           -10,964.380  
                                                                                (40,869.140) 
                                                                                             
Quarter                             -368.690        -409.320       -264.249       -433.626   
                                    (391.703)      (392.830)      (401.520)      (405.176)   
                                                                                             
GICS_SubInd45102010               60,616.850***    3,674.814      56,768.330     -8,405.311  
                                  (23,374.200)    (36,454.940)   (38,125.930)   (33,084.520) 
                                                                                             
GICS_SubInd45102020                -14,807.360      -895.254      23,531.300      -486.372   
                                  (27,854.320)    (28,058.330)   (31,245.720)   (27,500.770) 
                                                                                             
GICS_SubInd45102030              -87,141.380***     -306.370      47,670.850     -4,369.395  
                                  (31,940.970)    (16,556.810)   (34,237.680)   (22,585.050) 
                                                                                             
GICS_SubInd45103010               62,156.470***    3,980.556      55,410.220     -7,195.850  
                                  (23,546.240)    (37,956.660)   (37,145.310)   (29,196.380) 
                                                                                             
GICS_SubInd45103020                30,360.690*     1,585.560      48,445.120     -4,656.484  
                                  (17,892.970)    (21,729.630)   (33,988.960)   (22,323.070) 
                                                                                             
GICS_SubInd45201020               41,638.590**     1,682.935      55,113.340     -7,696.491  
                                  (18,571.880)    (23,133.560)   (37,169.630)   (29,623.490) 
                                                                                             
GICS_SubInd45202030                -1,836.300       559.351       37,407.580     -3,422.752  
                                  (14,684.520)    (16,144.240)   (27,743.060)   (18,907.740) 
                                                                                             
GICS_SubInd45203010               50,238.580**     2,344.318      50,537.200     -6,246.373  
                                  (20,255.880)    (25,115.600)   (34,147.150)   (25,834.530) 
                                                                                             
GICS_SubInd45203015               51,921.320**     2,973.927      55,833.980     -7,239.905  
                                  (20,700.800)    (29,750.090)   (37,252.440)   (29,219.110) 
                                                                                             
GICS_SubInd45203020               -87,662.500**     -682.504      30,837.550     -1,229.966  
                                  (34,277.860)    (20,773.980)   (28,279.830)   (20,802.030) 
                                                                                             
GICS_SubInd45203030                11,030.320       -236.770      28,349.860     -1,863.987  
                                  (19,413.750)    (19,094.400)   (26,161.800)   (20,026.620) 
                                                                                             
GICS_SubInd45301010                35,149.770*     2,039.072      51,921.270     -5,519.396  
                                  (18,728.200)    (22,933.270)   (35,815.820)   (25,607.310) 
                                                                                             
GICS_SubInd45301020               56,408.390**     3,293.565      50,582.750     -5,574.180  
                                  (22,004.750)    (31,625.800)   (34,250.580)   (24,256.760) 
                                                                                             
GICS_SubInd50202010               38,778.630**     1,639.503      49,196.350     -5,260.829  
                                  (19,561.760)    (23,592.840)   (34,579.990)   (23,431.000) 
                                                                                             
GICS_SubInd50203010              -202,347.800*** -67,368.710*** -92,281.040*** -66,520.720***
                                  (45,561.240)    (16,848.830)   (22,656.730)   (16,124.670) 
                                                                                             
Constant                         -67,600.750***    -1,911.130    -57,085.130     14,696.950  
                                  (25,685.430)    (50,951.560)   (40,106.880)   (42,237.800) 
                                                                                             
---------------------------------------------------------------------------------------------
Observations                         13,855          13,855         13,855         13,855    
R2                                    0.003          0.003          0.003          0.003     
Adjusted R2                           0.002          0.001          0.002          0.001     
Residual Std. Error (df = 13837)   208,816.700    208,892.900    208,873.700    208,892.400  
F Statistic (df = 17; 13837)        2.810***        2.215***       2.365***       2.218***   
=============================================================================================
Note:                                                             *p<0.1; **p<0.05; ***p<0.01
# again very low explanitory value

Testing Hypothesis 3

In a preliminary step, the statistical relationship between market concentration and the size of digital markets was examined. Accordingly, firm level Revenue and Market Capitalisation figures were aggregated per Quarter and GICS Subindustry. In a time-fixed model (i.e., controlling for Quarter), the two MC concentration measures were regressed on aggregated Market Capitalization and, correspondingly, the two revenue-based concentration measures on aggregated revenue. The results show a clear statistical relationship between the variables: the more concentrated a market, the larger the aggregated revenue and market capitalization of a market. While all concentration measures are highly significant, it is again market capitalization that serves as the better predictor of market size (in particular HHI MC). While this represents merely a naive model, the adjusted R-squared values show (HHI REv 0.066, CR4 Rev 0.031, HHI MC 0.296 (!), CR4 MC 0.045) give a clear indication that market concentration as a competition problem is more pronounced in large markets. The relationship between the size of a market and market concentration represents a good opportunity for futher, more in-depth research!

#Testing Hypothesis 3:
#Preleminary Analysis: Reduced market concentration has a statistical effect on the growth of China’s digital economy.

# Building a new data set with the aggregate revenue and market cap values per subindustry 


# sum the market cap and revenue by subindustry and quarter
df_grow <- aggregate(cbind(MC_Q1, MC_Q2, MC_Q3, MC_Q4, MC_Q5, MC_Q6, MC_Q7, MC_Q8, MC_Q9,
                          MC_Q10, MC_Q11, MC_Q12, MC_Q13, MC_Q14, MC_Q15, MC_Q16, Rev_Q1,
                          Rev_Q2, Rev_Q3, Rev_Q4, Rev_Q5, Rev_Q6, Rev_Q7, Rev_Q8, Rev_Q9,
                          Rev_Q10, Rev_Q11, Rev_Q12, Rev_Q13, Rev_Q14, Rev_Q15, Rev_Q16) ~ GICS_SubInd, data = df_wide, sum)





#starting with revenue concentration measures


df_grow_Rev  <-  merge(df_grow, df_wide[, c("GICS_SubInd", "HHIRev_SubInd_Q1", "HHIRev_SubInd_Q2", "HHIRev_SubInd_Q3",
                                      "HHIRev_SubInd_Q4",  "HHIRev_SubInd_Q5",  "HHIRev_SubInd_Q6",  "HHIRev_SubInd_Q7", 
                                      "HHIRev_SubInd_Q8",  "HHIRev_SubInd_Q9",  "HHIRev_SubInd_Q10", "HHIRev_SubInd_Q11", "HHIRev_SubInd_Q12",
                                       "HHIRev_SubInd_Q13", "HHIRev_SubInd_Q14", "HHIRev_SubInd_Q15", "HHIRev_SubInd_Q16", "CR4Rev_Subind_Q1", 
                                       "CR4Rev_Subind_Q2",  "CR4Rev_Subind_Q3",  "CR4Rev_Subind_Q4",  "CR4Rev_Subind_Q5",  "CR4Rev_Subind_Q6", 
                                       "CR4Rev_Subind_Q7",  "CR4Rev_Subind_Q8",  "CR4Rev_Subind_Q9",  "CR4Rev_Subind_Q10", "CR4Rev_Subind_Q11",
                                      "CR4Rev_Subind_Q12", "CR4Rev_Subind_Q13", "CR4Rev_Subind_Q14", "CR4Rev_Subind_Q15", "CR4Rev_Subind_Q16")], by = "GICS_SubInd", all.x = TRUE)

#dropping non unique values
df_grow_Rev <- df_grow_Rev[!duplicated(df_grow_Rev[, c("GICS_SubInd")]), ] 


#transforming into long format

df_grow_1 <- df_grow_Rev %>%
  pivot_longer(
    cols = starts_with("Rev_Q"),
    names_to = "Quarter",
    values_to = "Rev"
  ) %>%
  select(GICS_SubInd, Quarter, Rev)

df_grow_1$Quarter <- as.numeric(gsub("Rev_Q", "", df_grow_1$Quarter))

df_grow_2 <- df_grow_Rev %>%
  pivot_longer(
    cols = starts_with("HHIRev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIRev"
  ) %>%
  select(GICS_SubInd, Quarter, HHIRev)

df_grow_2$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", df_grow_2$Quarter))

df_grow_3 <- df_grow_Rev %>%
  pivot_longer(
    cols = starts_with("CR4Rev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4Rev"
  ) %>%
  select(GICS_SubInd, Quarter, CR4Rev)

df_grow_3$Quarter <- as.numeric(gsub("CR4Rev_Subind_Q", "", df_grow_3$Quarter))


dfgrow_Rev_long <- merge(merge(df_grow_1, df_grow_2, by = c("Quarter", "GICS_SubInd"), all = TRUE), 
                           df_grow_3, by = c("Quarter", "GICS_SubInd"), all = TRUE)
                    
## now we look at the effect of market concentration on total numbers of Revenue

reg16a <- lm(Rev ~ HHIRev + Quarter , data = dfgrow_Rev_long)
#summary(reg16a)

# the time-fxied regression model( when controlling for Quarter) shows the influence of HHI (Rev) on Total Revenue is highly significant
# the higher the market concentration, the higher the revenue of a market (???)


reg16b <- lm(Rev ~ CR4Rev + Quarter , data = dfgrow_Rev_long)
#summary(reg16b)
#CR4 also has a positive coefficient but is not significant


### now we are doing the same for market cap concentration measures

df_grow_MC  <-  merge(df_grow, df_wide[, c("GICS_SubInd",  "HHIMC_SubInd_Q1",   "HHIMC_SubInd_Q2",   "HHIMC_SubInd_Q3",   "HHIMC_SubInd_Q4",   "HHIMC_SubInd_Q5",  
                                            "HHIMC_SubInd_Q6",   "HHIMC_SubInd_Q7",   "HHIMC_SubInd_Q8",   "HHIMC_SubInd_Q9",   "HHIMC_SubInd_Q10", 
                                            "HHIMC_SubInd_Q11",  "HHIMC_SubInd_Q12",  "HHIMC_SubInd_Q13",  "HHIMC_SubInd_Q14",  "HHIMC_SubInd_Q15", 
                                            "HHIMC_SubInd_Q16",  "CR4MC_Subind_Q1",   "CR4MC_Subind_Q2",   "CR4MC_Subind_Q3",   "CR4MC_Subind_Q4",  
                                            "CR4MC_Subind_Q5",   "CR4MC_Subind_Q6",   "CR4MC_Subind_Q7",   "CR4MC_Subind_Q8",   "CR4MC_Subind_Q9",  
                                            "CR4MC_Subind_Q10",  "CR4MC_Subind_Q11",  "CR4MC_Subind_Q12",  "CR4MC_Subind_Q13",  "CR4MC_Subind_Q14", "CR4MC_Subind_Q15", "CR4MC_Subind_Q16")], by = "GICS_SubInd", all.x = TRUE)


#dropping non unique values
df_grow_MC <- df_grow_MC[!duplicated(df_grow_MC[, c("GICS_SubInd")]), ] 


#transforming into long format


df_grow_4 <- df_grow_MC %>%
  pivot_longer(
    cols = starts_with("MC_Q"),
    names_to = "Quarter",
    values_to = "MC"
  ) %>%
  select(GICS_SubInd, Quarter, MC)

df_grow_4$Quarter <- as.numeric(gsub("MC_Q", "", df_grow_4$Quarter))


df_grow_5 <- df_grow_MC %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  ) %>%
  select(GICS_SubInd, Quarter, HHIMC)

df_grow_5$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", df_grow_5$Quarter))

df_grow_6 <- df_grow_MC %>%
  pivot_longer(
    cols = starts_with("CR4MC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  ) %>%
  select(GICS_SubInd, Quarter, CR4MC)

df_grow_6$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", df_grow_6$Quarter))

#merging MC data sets
dfgrow_MC_long <- merge(merge(df_grow_4, df_grow_5, by = c("Quarter", "GICS_SubInd"), all = TRUE), 
                         df_grow_6, by = c("Quarter", "GICS_SubInd"), all = TRUE)

#regression market concentration variables on total MC with Quarter as control

reg16c <- lm(MC ~ HHIMC + Quarter , data = dfgrow_MC_long)
#summary(reg16c)

# same thing: even when controlling for Quarter, the influence of HHI (MC) on Total Revenue is highly significant
# the higher the market concentration, the higher the MC of a market 


reg16d <- lm(MC ~ CR4MC + Quarter , data = dfgrow_MC_long)
#summary(reg16d)

## this time CR4 concentration measure is also significant, even highly!!!

#conclusion Larger markets are more concentrated


stargazer(reg16a, reg16b, reg16c, reg16d, title = "Naive Model Market Concentration and Market Size", type = "text") 

Naive Model Market Concentration and Market Size
===============================================================================================================================
                                                                     Dependent variable:                                       
                               ------------------------------------------------------------------------------------------------
                                                    Rev                                              MC                        
                                        (1)                    (2)                      (3)                      (4)           
-------------------------------------------------------------------------------------------------------------------------------
HHIRev                         385,872,303,731.000***                                                                          
                               (121,304,982,623.000)                                                                           
                                                                                                                               
CR4Rev                                                  95,939,270,911.000                                                     
                                                      (105,624,904,965.000)                                                    
                                                                                                                               
HHIMC                                                                        19,122,860,336,600.000***                         
                                                                              (1,870,266,392,250.000)                          
                                                                                                                               
CR4MC                                                                                                  6,638,910,245,635.000***
                                                                                                       (2,043,512,598,127.000) 
                                                                                                                               
Quarter                        18,046,663,164.000***  17,646,246,734.000***   286,920,484,981.000***    233,813,978,575.000**  
                                (5,624,617,637.000)    (5,725,099,833.000)     (96,494,394,910.000)     (112,339,531,872.000)  
                                                                                                                               
Constant                       290,772,698,669.000*** 320,086,731,004.000***    230,519,542,215.000      913,729,570,006.000   
                                (61,672,423,096.000)   (88,980,825,298.000)   (1,044,494,198,920.000)  (1,681,848,350,024.000) 
                                                                                                                               
-------------------------------------------------------------------------------------------------------------------------------
Observations                            256                    256                      256                      256           
R2                                     0.073                  0.039                    0.301                    0.052          
Adjusted R2                            0.066                  0.031                    0.296                    0.045          
Residual Std. Error (df = 253)  414,729,794,806.000    422,254,208,708.000     7,093,076,372,427.000    8,261,598,569,857.000  
F Statistic (df = 2; 253)             9.966***               5.145***                54.565***                 6.968***        
===============================================================================================================================
Note:                                                                                               *p<0.1; **p<0.05; ***p<0.01

Moving on to the main anlysis, the effect of the new regulatory approach on growth rates was tested (1) one the aggregated market level and (2) on the firm level. When plotting the calculating growth rates per GICS subindustry we can observe a general negative trend for market capitalisation growth rates and a cyclical yet stable trajectory for revenue growth rates. In all cases, no abnormal patterns can be observed around the cut off with the notable exception of the 45102020 subindustry (Data Processing & Outsourced Services) that is dropping off dramatically in quarter 9 before quickly rebounding in the market capitalisation graph. A time-fixed model shows so significant treatment effect for either market cap or revenue growth rates. An interaction term for the different market concentration measures was included to test whether a part of the treatment effect was mediated by market concentratino with no significant results.

#####no we calculate the growth rates

df_growthrateMC <- df_grow_4  %>% 
  group_by(GICS_SubInd) %>% 
  mutate(Growth_MC = (MC - lag(MC))/lag(MC)) %>% 
  select(GICS_SubInd, Quarter, Growth_MC)

df_growthrateRev <- df_grow_1  %>% 
  group_by(GICS_SubInd) %>% 
  mutate(Growth_Rev = (Rev - lag(Rev))/lag(Rev)) %>% 
  select(GICS_SubInd, Quarter, Growth_Rev)

#merging MC data sets
dfgrow_MC_long <- merge(dfgrow_MC_long, df_growthrateMC, by = c("Quarter", "GICS_SubInd"), all = TRUE)


#merging Rev data sets
dfgrow_Rev_long <- merge(dfgrow_Rev_long, df_growthrateRev, by = c("Quarter", "GICS_SubInd"), all = TRUE)


##### Let's plot growth rates of Market Cap and Rev

ggplot(dfgrow_MC_long, aes(x = Quarter, y = Growth_MC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "Growthrate Market Cap", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("Market Capitalisation Growthrate per GICS Subindustry") +
  theme_minimal()

###very confusing, one cant really detect any patterns
###sharp decline only in 1 GICS subindustry: 45102020 --> Data Processing & Outsourced Services 


ggplot(dfgrow_Rev_long , aes(x = Quarter, y = Growth_Rev, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "Growthrate Revenze", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("Revenue Growthrate per GICS Subindustry") +
  theme_minimal()

### first we test the concentration measures (controlled for Quarter) as a predictor on growth rates

#first 2 concentration measures for market cap in a time-fixed model

reg17a <- lm(Growth_MC ~ HHIMC + Quarter , data = dfgrow_MC_long)
#summary(reg17a)
### no significant effect of HHI (MC) on Market Cap growth rate when controlling for Quarter (same if we remove Quarter)

reg17b <- lm(Growth_MC ~ CR4MC + Quarter , data = dfgrow_MC_long)
#summary(reg10)
# same when testing for CR4

# now for the 2 concentration measures for revenue
reg17c <- lm(Growth_Rev ~ HHIRev + Quarter , data = dfgrow_Rev_long)
#summary(reg17c)
### no significant effect 

reg17d <- lm(Growth_Rev ~ CR4Rev + Quarter , data = dfgrow_Rev_long)
#summary(reg12)
## also no significant coefficient

### at the subindustry market level there is no statistical significant effect

#stargazer(reg17a, reg17b, reg17c, reg17d, title = "Time-fixed Model Market Concentration Profit Margins Aggregate Market Level", type = "text")

# Now we test the treatment effect of the regulatory approach by including a dummy variable
#adding treatment variable

dfgrow_Rev_long$treatment <- ifelse(dfgrow_Rev_long$Quarter >= 9, 1, 0)

dfgrow_MC_long$treatment <- ifelse(dfgrow_MC_long$Quarter >= 9, 1, 0)

#2 time fixed effects model to test treatment effect
#REV

reg18a <- lm(Growth_Rev ~  Quarter + treatment, data = dfgrow_Rev_long)

#MC

reg18b <- lm(Growth_MC ~  Quarter + treatment, data = dfgrow_MC_long)
#summary(reg18b)
#no significant treatment effect

stargazer(reg18a, reg18b, title = "Time fixed effects Model Profit Margins Aggregate Market Level",type = "text")

Time fixed effects Model Profit Margins Aggregate Market Level
===========================================================
                                   Dependent variable:     
                               ----------------------------
                                 Growth_Rev     Growth_MC  
                                    (1)            (2)     
-----------------------------------------------------------
Quarter                            -0.008       -0.016***  
                                  (0.009)        (0.005)   
                                                           
treatment                          0.059         -0.027    
                                  (0.078)        (0.046)   
                                                           
Constant                          0.108**       0.202***   
                                  (0.053)        (0.031)   
                                                           
-----------------------------------------------------------
Observations                        240            240     
R2                                 0.003          0.177    
Adjusted R2                        -0.005         0.170    
Residual Std. Error (df = 237)     0.300          0.178    
F Statistic (df = 2; 237)          0.357        25.416***  
===========================================================
Note:                           *p<0.1; **p<0.05; ***p<0.01
#2  inclduing interaction term for treatment and market concentration to test for effect mediated by market concentration
#HHIREV

#HHI Rev
reg18c <- lm(Growth_Rev ~ HHIRev + Quarter + treatment + HHIRev*treatment, data = dfgrow_Rev_long)

#no significant results

#CR4 Rev

reg18d <- lm(Growth_Rev ~ CR4Rev + Quarter + treatment + CR4Rev*treatment, data = dfgrow_Rev_long)

#not significant


#HHI MC

reg18e <- lm(Growth_MC ~ HHIMC + Quarter + treatment + HHIMC*treatment, data = dfgrow_MC_long)

#no significant treatment effect

#CR4 MC
reg18f <- lm(Growth_MC ~ CR4MC  + Quarter + treatment + + CR4MC*treatment, data = dfgrow_MC_long)



stargazer(reg18c, reg18d, reg18e, reg18f, title = "Time fixed effects Model Profit Margins Aggregate Market Level with interaction term",type = "text")

Time fixed effects Model Profit Margins Aggregate Market Level with interaction term
==================================================================
                                       Dependent variable:        
                               -----------------------------------
                                 Growth_Rev         Growth_MC     
                                 (1)     (2)      (3)       (4)   
------------------------------------------------------------------
HHIRev                         -0.049                             
                               (0.129)                            
                                                                  
CR4Rev                                 -0.049                     
                                       (0.113)                    
                                                                  
HHIMC                                            0.042            
                                                (0.067)           
                                                                  
CR4MC                                                      0.068  
                                                          (0.066) 
                                                                  
Quarter                        -0.007  -0.007  -0.016*** -0.016***
                               (0.009) (0.009)  (0.005)   (0.005) 
                                                                  
treatment                       0.077   0.118   -0.006     0.034  
                               (0.088) (0.127)  (0.051)   (0.073) 
                                                                  
HHIRev:treatment               -0.082                             
                               (0.184)                            
                                                                  
CR4Rev:treatment                       -0.092                     
                                       (0.156)                    
                                                                  
HHIMC:treatment                                 -0.095            
                                                (0.098)           
                                                                  
CR4MC:treatment                                           -0.100  
                                                          (0.091) 
                                                                  
Constant                       0.119*   0.139  0.193***  0.162*** 
                               (0.061) (0.091)  (0.035)   (0.051) 
                                                                  
------------------------------------------------------------------
Observations                     240     240      240       240   
R2                              0.008   0.011    0.180     0.181  
Adjusted R2                    -0.009  -0.006    0.166     0.167  
Residual Std. Error (df = 235)  0.300   0.300    0.178     0.178  
F Statistic (df = 4; 235)       0.465   0.661  12.886*** 13.005***
==================================================================
Note:                                  *p<0.1; **p<0.05; ***p<0.01

For the firm level the growth rates were calculated on the same basis (again excluding columns with missing revenue or revenue = 0). This time a two-way fixed effects was employed controlling for quarter and GICS subindustry. Only the market capitalization models ascertains a small positive treatment effect at the 95 percent significance level. When introcuding an interaction term for the different market concentration measures,the interaction itself is never significant but the treatment effect remains significant in the HHI MC model.However, the explanatory value is relatively low at 1,5 percent. For future reasearch, it would be very interesting to build a more accurate model that incorporates all relevant predictors of growth rate identified by the literature. This would allow us to obtain a better understanding of the predictory power of market concentration and the new regulatory approach. That being said, there is no indication of a negative effect of the regulatory approach on firm growth rates, which is an important result in itself.

### we can also do that on firm level

# in order to do that we need to calculate the growth rate for revenue and market cap

#transforming df_wide into long



dflong1 <- df_wide %>%
  pivot_longer(
    cols = starts_with("Rev_Q"),
    names_to = "Quarter",
    values_to = "Rev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, Rev)

dflong1$Quarter <- as.numeric(gsub("Rev_Q", "", dflong1$Quarter))

dflong2 <- df_wide %>%
  pivot_longer(
    cols = starts_with("HHIRev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIRev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, HHIRev)

dflong2$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", dflong2$Quarter))

dflong3 <- df_wide %>%
  pivot_longer(
    cols = starts_with("CR4Rev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4Rev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, CR4Rev)

dflong3$Quarter <- as.numeric(gsub("CR4Rev_Subind_Q", "", dflong3$Quarter))

dflong4 <- df_wide %>%
  pivot_longer(
    cols = starts_with("MC_Q"),
    names_to = "Quarter",
    values_to = "MC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, MC)

dflong4$Quarter <- as.numeric(gsub("MC_Q", "", dflong4$Quarter))


dflong5 <- df_wide %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, HHIMC)

dflong5$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", dflong5$Quarter))

dflong6 <- df_wide %>%
  pivot_longer(
    cols = starts_with("CR4MC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, CR4MC)

dflong6$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", dflong6$Quarter))


#joining long data sets

dfgrowthrates <- left_join(dflong1, dflong2, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong3, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong4, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong5, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong6, by = c("Name", "GICS_SubInd", "Quarter"))

#calculating growth rates of Rev and MC firm level

dfgrowthrates <- dfgrowthrates %>%
  arrange(Name, Quarter) %>%
  group_by(Name) %>%
  mutate(
    GrowthR_MC = (MC - lag(MC)) / lag(MC),
    GrowthR_Rev = (Rev - lag(Rev)) / lag(Rev)
  )

# now we add the treatment

dfgrowthrates$treatment <- ifelse(dfgrowthrates$Quarter >= 9, 1, 0)

#again get rid of the infinity results
dfgrowthrates <- dfgrowthrates[!dfgrowthrates$GrowthR_Rev %in% c(-Inf, Inf),]
dfgrowthrates <- dfgrowthrates[!dfgrowthrates$GrowthR_MC %in% c(-Inf, Inf),]

### now we can do our two way fixed effects model

# Rev
reg19a <- lm(GrowthR_Rev ~  GICS_SubInd + Quarter + treatment, data = dfgrowthrates)
#summary(reg19a)
#### no significance

#MC
reg19b <- lm(GrowthR_MC ~  GICS_SubInd + Quarter + treatment, data = dfgrowthrates)
#summary(reg19b)

stargazer(reg19a, reg19b,  title="Two-way fixed effcts Treament Profit Margins Firm Level", type = "text")

Two-way fixed effcts Treament Profit Margins Firm Level
========================================================================
                                    Dependent variable:                 
                    ----------------------------------------------------
                           GrowthR_Rev                GrowthR_MC        
                               (1)                       (2)            
------------------------------------------------------------------------
GICS_SubInd45102010           0.205                     0.054           
                             (0.509)                   (0.040)          
                                                                        
GICS_SubInd45102020          -0.068                     0.075           
                             (1.053)                   (0.074)          
                                                                        
GICS_SubInd45102030           0.097                    0.194***         
                             (0.630)                   (0.053)          
                                                                        
GICS_SubInd45103010           0.181                    0.087**          
                             (0.495)                   (0.039)          
                                                                        
GICS_SubInd45103020           0.433                     0.061           
                             (0.575)                   (0.046)          
                                                                        
GICS_SubInd45201020           0.276                    0.084**          
                             (0.493)                   (0.039)          
                                                                        
GICS_SubInd45202030           0.119                     0.053           
                             (0.559)                   (0.044)          
                                                                        
GICS_SubInd45203010           0.208                     0.068*          
                             (0.478)                   (0.038)          
                                                                        
GICS_SubInd45203015          -0.043                    0.090**          
                             (0.480)                   (0.038)          
                                                                        
GICS_SubInd45203020          -0.066                    0.151***         
                             (0.784)                   (0.058)          
                                                                        
GICS_SubInd45203030         4.124***                    0.068           
                             (0.727)                   (0.061)          
                                                                        
GICS_SubInd45301010           0.094                    0.190***         
                             (0.578)                   (0.048)          
                                                                        
GICS_SubInd45301020           0.350                    0.138***         
                             (0.494)                   (0.039)          
                                                                        
GICS_SubInd50202010          1.240**                   0.093**          
                             (0.576)                   (0.044)          
                                                                        
GICS_SubInd50203010          -0.054                     0.040           
                             (0.610)                   (0.045)          
                                                                        
Quarter                      -0.025                   -0.022***         
                             (0.031)                   (0.003)          
                                                                        
treatment                     0.171                    0.048**          
                             (0.271)                   (0.023)          
                                                                        
Constant                      0.274                    0.140***         
                             (0.487)                   (0.038)          
                                                                        
------------------------------------------------------------------------
Observations                 12,916                     13,027          
R2                            0.005                     0.015           
Adjusted R2                   0.003                     0.014           
Residual Std. Error    7.733 (df = 12898)         0.660 (df = 13009)    
F Statistic         3.508*** (df = 17; 12898) 11.802*** (df = 17; 13009)
========================================================================
Note:                                        *p<0.1; **p<0.05; ***p<0.01
### we get a positive treatment effect for MC


# Let's test for interaction effect with market concentration to see what part is mediated through a change in market concentration


#HHI Rev
reg19c <- lm(GrowthR_Rev ~ HHIRev + GICS_SubInd + Quarter + treatment + HHIRev*treatment, data = dfgrowthrates)
#summary(reg19c)


#CR4 Rev
reg19d <- lm(GrowthR_Rev ~ CR4Rev + GICS_SubInd + Quarter + treatment + CR4Rev*treatment, data = dfgrowthrates)
#summary(reg19d)


#HHIMC
reg19e <- lm(GrowthR_MC ~ HHIMC + GICS_SubInd + Quarter + treatment + HHIMC*treatment, data = dfgrowthrates)
#summary(reg19e)

#CR4 MC
reg19f <- lm(GrowthR_MC ~ CR4MC + GICS_SubInd + Quarter + treatment + CR4MC*treatment, data = dfgrowthrates)
#summary(reg19f)


stargazer(reg19c, reg19d, reg19e, reg19f, title="Two-way fixed effcts with Interaction Term Profit Margins Firm Level", type = "text")

Two-way fixed effcts with Interaction Term Profit Margins Firm Level
=============================================================================================================================
                                                               Dependent variable:                                           
                    ---------------------------------------------------------------------------------------------------------
                                        GrowthR_Rev                                          GrowthR_MC                      
                               (1)                       (2)                       (3)                        (4)            
-----------------------------------------------------------------------------------------------------------------------------
HHIRev                       -3.965                                                                                          
                             (3.604)                                                                                         
                                                                                                                             
CR4Rev                                                 -0.560                                                                
                                                       (2.251)                                                               
                                                                                                                             
HHIMC                                                                            -0.330*                                     
                                                                                 (0.195)                                     
                                                                                                                             
CR4MC                                                                                                        -0.174          
                                                                                                            (0.152)          
                                                                                                                             
GICS_SubInd45102010          -0.644                     0.255                     -0.142                     -0.073          
                             (0.963)                   (1.468)                   (0.130)                    (0.119)          
                                                                                                                             
GICS_SubInd45102020           0.140                    -0.047                     -0.016                     0.070           
                             (1.071)                   (1.077)                   (0.094)                    (0.074)          
                                                                                                                             
GICS_SubInd45102030           1.289                     0.126                     0.028                      0.127           
                             (1.302)                   (0.631)                   (0.118)                    (0.080)          
                                                                                                                             
GICS_SubInd45103010          -0.689                     0.244                     -0.104                     -0.022          
                             (0.974)                   (1.534)                   (0.127)                    (0.104)          
                                                                                                                             
GICS_SubInd45103020          -0.010                     0.493                     -0.108                     -0.009          
                             (0.721)                   (0.874)                   (0.117)                    (0.078)          
                                                                                                                             
GICS_SubInd45201020          -0.307                     0.307                     -0.107                     -0.027          
                             (0.748)                   (0.915)                   (0.127)                    (0.106)          
                                                                                                                             
GICS_SubInd45202030           0.130                     0.152                     -0.076                     0.003           
                             (0.559)                   (0.627)                   (0.093)                    (0.063)          
                                                                                                                             
GICS_SubInd45203010          -0.498                     0.263                     -0.107                     -0.027          
                             (0.833)                   (1.011)                   (0.117)                    (0.092)          
                                                                                                                             
GICS_SubInd45203015          -0.767                    -0.005                     -0.102                     -0.020          
                             (0.846)                   (1.189)                   (0.127)                    (0.105)          
                                                                                                                             
GICS_SubInd45203020           1.130                    -0.040                     0.040                     0.136**          
                             (1.375)                   (0.795)                   (0.092)                    (0.060)          
                                                                                                                             
GICS_SubInd45203030         3.969***                  4.143***                    -0.031                     0.043           
                             (0.743)                   (0.727)                   (0.088)                    (0.065)          
                                                                                                                             
GICS_SubInd45301010          -0.403                     0.135                     0.011                      0.102           
                             (0.753)                   (0.918)                   (0.123)                    (0.092)          
                                                                                                                             
GICS_SubInd45301020          -0.438                     0.402                     -0.036                     0.052           
                             (0.907)                   (1.275)                   (0.117)                    (0.086)          
                                                                                                                             
GICS_SubInd50202010           0.698                     1.281                     -0.077                     0.019           
                             (0.778)                   (0.928)                   (0.116)                    (0.079)          
                                                                                                                             
GICS_SubInd50203010           1.811                    -0.051                     0.125*                     0.044           
                             (1.882)                   (0.642)                   (0.069)                    (0.045)          
                                                                                                                             
Quarter                      -0.025                    -0.026                   -0.023***                  -0.023***         
                             (0.031)                   (0.032)                   (0.003)                    (0.003)          
                                                                                                                             
treatment                     0.153                    -0.353                    0.054**                     0.057           
                             (0.294)                   (0.444)                   (0.026)                    (0.039)          
                                                                                                                             
HHIRev:treatment              0.137                                                                                          
                             (0.969)                                                                                         
                                                                                                                             
CR4Rev:treatment                                        1.064                                                                
                                                       (0.718)                                                               
                                                                                                                             
HHIMC:treatment                                                                   0.001                                      
                                                                                 (0.066)                                     
                                                                                                                             
CR4MC:treatment                                                                                              0.006           
                                                                                                            (0.059)          
                                                                                                                             
Constant                      1.274                     0.512                    0.352**                    0.314**          
                             (1.054)                   (2.040)                   (0.138)                    (0.158)          
                                                                                                                             
-----------------------------------------------------------------------------------------------------------------------------
Observations                 12,916                    12,916                     13,027                     13,027          
R2                            0.005                     0.005                     0.015                      0.015           
Adjusted R2                   0.003                     0.003                     0.014                      0.014           
Residual Std. Error    7.733 (df = 12896)        7.733 (df = 12896)         0.660 (df = 13007)         0.660 (df = 13007)    
F Statistic         3.206*** (df = 19; 12896) 3.255*** (df = 19; 12896) 10.719*** (df = 19; 13007) 10.628*** (df = 19; 13007)
=============================================================================================================================
Note:                                                                                             *p<0.1; **p<0.05; ***p<0.01

Testing Hypothesis 4

### firms entering/leaving the market is approximated by difference in listed firms
#working with NAs here

###Overview NA's for market cap

df_na <- df_wide %>% select(4:19)

# count the number of NAs by variable
na_counts <- colSums(is.na(df_na))
na_counts
 MC_Q1  MC_Q2  MC_Q3  MC_Q4  MC_Q5  MC_Q6  MC_Q7  MC_Q8  MC_Q9 MC_Q10 MC_Q11 
   445    434    423    396    365    347    321    264    239    206    177 
MC_Q12 MC_Q13 MC_Q14 MC_Q15 MC_Q16 
   158    131    105     79     32 
#ok let's get the actual df with all relevant variables

df_NA <- df_wide %>% select(2:19, 84:115)

#converting it too long format 

df_NA_long1 <- df_NA %>%
  pivot_longer(
    cols = starts_with("MC_Q"),
    names_to = "Quarter",
    values_to = "MC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, MC)

df_NA_long1$Quarter <- as.numeric(gsub("MC_Q", "", df_NA_long1$Quarter))


#transforming df so it contains NAs for MC per Quarter and Subindustry

df_NA_long1 <- df_NA_long1 %>% 
  group_by(GICS_SubInd, Quarter) %>% 
  summarize(Missing_MC = sum(is.na(MC)))

#creating df for MCHHI

df_NA_long2 <- df_NA[!duplicated(df_NA[, c("GICS_SubInd")]), ] 

df_NA_long2 <- df_NA_long2 %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  ) %>%
  select(GICS_SubInd, Quarter, HHIMC)

df_NA_long2$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", df_NA_long2$Quarter))

#getting CR4MC

df_NA_long3 <- df_NA[!duplicated(df_NA[, c("GICS_SubInd")]), ] 

df_NA_long3 <- df_NA_long3 %>%
  pivot_longer(
    cols = starts_with("CR4MC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  ) %>%
  select(GICS_SubInd, Quarter, CR4MC)

df_NA_long3$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", df_NA_long3$Quarter))


###merging data sets

df_NA_long <- merge(merge(df_NA_long1, df_NA_long2, by = c("Quarter", "GICS_SubInd"), all = TRUE), df_NA_long3, by = c("Quarter", "GICS_SubInd"), all = TRUE)

df_NA_long <- df_NA_long %>% arrange(Quarter)

# Calculate the change in missing values by subindustry and quarter
df_NA_long <- df_NA_long %>%
  group_by(GICS_SubInd) %>%
  mutate(NewFirms = Missing_MC - lag(Missing_MC)) %>%
  mutate(NewFirms =  -1 * NewFirms)   %>%
  ungroup()

#  Plot the data using ggplot2
ggplot(df_NA_long, aes(x = Quarter, y = NewFirms, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "New Firms", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("Firms entering the Market") +
  theme_minimal()

#no real patterns detectable but there are some dropoffs around the cutoff


### regression time first only looking at HHIMC on new firms

reg20a <- lm(NewFirms ~ HHIMC, data = df_NA_long)
#summary(reg20a)

reg20b <- lm(NewFirms ~ CR4MC, data = df_NA_long)
#summary(reg20b)

#stargazer(reg20a, reg20b, title="Naive Regression Market Concentration New Firms", type = "text")
#the higher the HHI and market concentration, the lower the number of new (listed) firms entering the market
#-->highly significant effect
#-->adj r-squared at around 7 percent

#now checking for time-fixed effect

reg20c <- lm(NewFirms ~ HHIMC + Quarter, data = df_NA_long)
#summary(reg20c)

reg20d <- lm(NewFirms ~ CR4MC + Quarter, data = df_NA_long)
#summary(reg20d)

stargazer(reg20a, reg20b, reg20c, reg20d, title="Relationship Market Concentration New Firms", type = "text")

Relationship Market Concentration New Firms
===================================================================================================================
                                                          Dependent variable:                                      
                    -----------------------------------------------------------------------------------------------
                                                               NewFirms                                            
                              (1)                     (2)                     (3)                     (4)          
-------------------------------------------------------------------------------------------------------------------
HHIMC                      -2.717***                                       -2.637***                               
                            (0.617)                                         (0.617)                                
                                                                                                                   
CR4MC                                              -3.645***                                       -3.578***       
                                                    (0.552)                                         (0.552)        
                                                                                                                   
Quarter                                                                     0.057*                   0.053         
                                                                            (0.034)                 (0.032)        
                                                                                                                   
Constant                   2.307***                3.880***                1.773***                3.367***        
                            (0.198)                 (0.355)                 (0.370)                 (0.474)        
                                                                                                                   
-------------------------------------------------------------------------------------------------------------------
Observations                  240                     240                     240                     240          
R2                           0.075                   0.155                   0.086                   0.164         
Adjusted R2                  0.071                   0.151                   0.079                   0.157         
Residual Std. Error    2.258 (df = 238)        2.159 (df = 238)        2.249 (df = 237)        2.151 (df = 237)    
F Statistic         19.366*** (df = 1; 238) 43.602*** (df = 1; 238) 11.214*** (df = 2; 237) 23.285*** (df = 2; 237)
===================================================================================================================
Note:                                                                                   *p<0.1; **p<0.05; ***p<0.01
#now checking for treatment effect in a time fixed effects model
df_NA_long$treatment <- ifelse(df_NA_long$Quarter >= 9, 1, 0)

reg21a <- lm(NewFirms ~  Quarter + treatment, data = df_NA_long)
#summary(reg21a)
# relevant positive treatment effect

reg21b <- lm(NewFirms ~ Quarter + treatment, data = df_NA_long)

# now we introduce market concentration as a control variable

reg21c <- lm(NewFirms ~ HHIMC + Quarter + treatment, data = df_NA_long)
#summary(reg21c)
# relevant positive treatment effect

reg21d <- lm(NewFirms ~ CR4MC + Quarter + treatment, data = df_NA_long)
#summary(reg21d)

stargazer(reg21a, reg21b, reg21c, reg21d, title="Time-fixed Regression Model with Concentration as Control", type = "text")

Time-fixed Regression Model with Concentration as Control
==============================================================================================================
                                                       Dependent variable:                                    
                    ------------------------------------------------------------------------------------------
                                                             NewFirms                                         
                             (1)                   (2)                   (3)                     (4)          
--------------------------------------------------------------------------------------------------------------
HHIMC                                                                 -2.627***                               
                                                                       (0.612)                                
                                                                                                              
CR4MC                                                                                         -3.531***       
                                                                                               (0.549)        
                                                                                                              
Quarter                   0.195***              0.195***               0.182***                0.164**        
                           (0.069)               (0.069)               (0.067)                 (0.064)        
                                                                                                              
treatment                 -1.263**              -1.263**               -1.244**               -1.113**        
                           (0.598)               (0.598)               (0.578)                 (0.554)        
                                                                                                              
Constant                    0.643                 0.643                1.315***               2.929***        
                           (0.409)               (0.409)               (0.424)                 (0.519)        
                                                                                                              
--------------------------------------------------------------------------------------------------------------
Observations                 240                   240                   240                     240          
R2                          0.034                 0.034                 0.104                   0.178         
Adjusted R2                 0.026                 0.026                 0.093                   0.168         
Residual Std. Error   2.313 (df = 237)      2.313 (df = 237)       2.232 (df = 236)       2.138 (df = 236)    
F Statistic         4.182** (df = 2; 237) 4.182** (df = 2; 237) 9.138*** (df = 3; 236) 17.069*** (df = 3; 236)
==============================================================================================================
Note:                                                                              *p<0.1; **p<0.05; ***p<0.01

In this section we do some robustness checks for our two models. In conclusion, we can establish that market concentration is a strong predicator of the number of new firms regardless of model. The higher the market concentration, the fewer firms enter the market. Second, the plots and the different regressions point to a small positive treatment effect. However, the robustness checks show that the regression models are exposed to a high degree of heteroskedasticity. Neither loging the dependant variable nor using a higher polynomial for quarter fixes the issues. When trying to control for this with an interaction term between quarter and treatment, the interaction istelf, but not the treatment effect is significant. Ultimately, the results are inconclusive.

#first I  remove all quarter 1 values because they all have NAs

df_NA_long <- df_NA_long %>% 
  filter(Quarter != 1)


# Add predicted values to the data frame
df_NA_long$predictedHHI <- predict(reg21a)
df_NA_long$predictedCR4 <- predict(reg21b)


# Create line plot of actual and predicted values across quarters for HHI MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predictedHHI, color = "PredictedHHI")) +
  scale_color_manual(values = c("Actual" = "black", "PredictedHHI" = "red")) +
  labs(x = "Quarter", y = "NewFirms", title = "Regression Model Performance HHI MC")

# Create line plot of actual and predicted values across quarters for CR4 MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predictedCR4, color = "PredictedCR4")) +
  scale_color_manual(values = c("Actual" = "black", "PredictedCR4" = "red")) +
  labs(x = "Quarter", y = "NewFirms", title = "Regression Model Performance CR4 MC")

## that doesn't look too god

#### testing for Linearity

#HHI MC model
# Plotting fitted values against residuals
plot(reg21a, 1)
#spread of the residuals increases as the predicted values increase thereby indicating that the variances of the residuals are not constant across the range of the data. 

#CR4 MC model
# Plotting fitted values against residuals
plot(reg21b, 1)

#same problem applies


#testing for homosecasticity

#HHI MC 

bp_HHI <- bptest(reg21a)
bp_HHI 

    studentized Breusch-Pagan test

data:  reg21a
BP = 10.348, df = 2, p-value = 0.005662
#based on the results, there is evidence of heteroscedasticity in the regression mode

#CR4 MC 

bp_CR4 <- bptest(reg21b)
bp_CR4 

    studentized Breusch-Pagan test

data:  reg21b
BP = 10.348, df = 2, p-value = 0.005662
## same here


### let's test different approach to deal with  heteroscedasticity 

#first let make Quarter not linear but quadratic

r1HHI <-  lm(NewFirms ~ HHIMC + Quarter^2 + treatment, data = df_NA_long)
r1CR4 <-  lm(NewFirms ~ CR4MC + Quarter^2 + treatment, data = df_NA_long)

#now redo the test
bp_HHI_robust <- bptest(r1HHI)
bp_HHI_robust 

    studentized Breusch-Pagan test

data:  r1HHI
BP = 13.508, df = 3, p-value = 0.003657
#has not changed anything

bp_CR4_robust <- bptest(r1CR4)
bp_CR4_robust 

    studentized Breusch-Pagan test

data:  r1CR4
BP = 23.112, df = 3, p-value = 3.827e-05
#doesn't work

### we can't log the dependant variable because there are some negative values

#another option is to estimate different slopes for the treatment levels in the regression model with interaction term between the treatment variable and the Quarter variable. This will allow the slope of the Quarter variable to vary depending on the treatment level. 

r2HHI <-  lm(NewFirms ~ HHIMC + Quarter + treatment + Quarter*treatment, data = df_NA_long)
r2CR4 <-  lm(NewFirms ~ CR4MC + Quarter + treatment + Quarter*treatment, data = df_NA_long)

summary(r2HHI)

Call:
lm(formula = NewFirms ~ HHIMC + Quarter + treatment + Quarter * 
    treatment, data = df_NA_long)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.1186 -1.4453 -0.4713  0.7282 10.5423 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.5012     0.5844   0.858  0.39196    
HHIMC              -2.6374     0.6081  -4.338 2.14e-05 ***
Quarter             0.3451     0.1048   3.292  0.00115 ** 
treatment           0.9314     1.2250   0.760  0.44783    
Quarter:treatment  -0.2720     0.1353  -2.010  0.04553 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.218 on 235 degrees of freedom
Multiple R-squared:  0.1192,    Adjusted R-squared:  0.1042 
F-statistic: 7.952 on 4 and 235 DF,  p-value: 4.991e-06
summary(r2CR4)

Call:
lm(formula = NewFirms ~ CR4MC + Quarter + treatment + Quarter * 
    treatment, data = df_NA_long)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.7361 -1.3411 -0.4142  0.6915 10.2725 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.0437     0.6333   3.227  0.00143 ** 
CR4MC              -3.5946     0.5439  -6.608 2.58e-10 ***
Quarter             0.3488     0.1000   3.488  0.00058 ***
treatment           1.3606     1.1713   1.162  0.24659    
Quarter:treatment  -0.3088     0.1293  -2.389  0.01766 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.117 on 235 degrees of freedom
Multiple R-squared:  0.1978,    Adjusted R-squared:  0.1841 
F-statistic: 14.48 on 4 and 235 DF,  p-value: 1.376e-10
#in both cases, the treatment effect is no longer significant. Based on the p-value for the treatment variable, it does not have a significant effect on the number of NewFirms. However, the interaction term "Quarter:treatment" is significant, which means that the effect of treatment on NewFirms depends on the quarter. Therefore, it is important to examine the coefficients for the treatment variable for each quarter separately to determine if there is a significant effect.

#first let's see if there is still evidence of heteroscedasticity in the regression model

df_NA_long$predictedHHInew <- predict(r2HHI)

# Create line plot of actual and predicted values across quarters for new HHI MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predictedHHI, color = "PredictedHHInew")) +
  scale_color_manual(values = c("Actual" = "black", "PredictedHHInew" = "red")) +
  labs(x = "Quarter", y = "NewFirms", title = " HHI MC Model With interaction effect")

df_NA_long$predictedCR4new <- predict(r2CR4)

# Create line plot of actual and predicted values across quarters for new CR4 MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predictedHHI, color = "PredictedCR4new")) +
  scale_color_manual(values = c("Actual" = "black", "PredictedCR4new" = "red")) +
  labs(x = "Quarter", y = "NewFirms", title = " CR4 MC Model With interaction effect")

bp_r2HHI <- bptest(r1HHI)
bp_r2HHI

    studentized Breusch-Pagan test

data:  r1HHI
BP = 13.508, df = 3, p-value = 0.003657
#has not changed anything

bp_r2CR4 <- bptest(r2CR4)
bp_r2CR4 

    studentized Breusch-Pagan test

data:  r2CR4
BP = 21.451, df = 4, p-value = 0.0002577
#same here

#Last try: Recoding the Quarter Variable as Character so we get an interaction effect for each quarter

#turning Quarter into character variable
df_NA_long$Quarter_char <- paste0("Q", df_NA_long$Quarter)

#new regerssion with interaction term with character quarter variable

r3HHI <-  lm(NewFirms ~ HHIMC + Quarter_char + treatment + Quarter_char*treatment, data = df_NA_long)
summary(r3HHI)

Call:
lm(formula = NewFirms ~ HHIMC + Quarter_char + treatment + Quarter_char * 
    treatment, data = df_NA_long)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.4946 -1.4450 -0.4942  0.8015  9.7198 

Coefficients: (15 not defined because of singularities)
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                2.61529    0.56696   4.613 6.68e-06 ***
HHIMC                     -2.63380    0.60602  -4.346 2.10e-05 ***
Quarter_charQ11           -0.25682    0.78136  -0.329   0.7427    
Quarter_charQ12           -0.87393    0.78136  -1.118   0.2646    
Quarter_charQ13           -0.39976    0.78138  -0.512   0.6094    
Quarter_charQ14           -0.49201    0.78146  -0.630   0.5296    
Quarter_charQ15           -0.49192    0.78146  -0.629   0.5297    
Quarter_charQ16            0.80910    0.78151   1.035   0.3016    
Quarter_charQ2            -1.27138    0.78172  -1.626   0.1053    
Quarter_charQ3            -1.30202    0.78154  -1.666   0.0971 .  
Quarter_charQ4            -0.32089    0.78146  -0.411   0.6817    
Quarter_charQ5            -0.07797    0.78144  -0.100   0.9206    
Quarter_charQ6            -0.90025    0.78141  -1.152   0.2505    
Quarter_charQ7            -0.38791    0.78144  -0.496   0.6201    
Quarter_charQ8             1.53301    0.78140   1.962   0.0510 .  
Quarter_charQ9            -0.45272    0.78144  -0.579   0.5629    
treatment                       NA         NA      NA       NA    
Quarter_charQ11:treatment       NA         NA      NA       NA    
Quarter_charQ12:treatment       NA         NA      NA       NA    
Quarter_charQ13:treatment       NA         NA      NA       NA    
Quarter_charQ14:treatment       NA         NA      NA       NA    
Quarter_charQ15:treatment       NA         NA      NA       NA    
Quarter_charQ16:treatment       NA         NA      NA       NA    
Quarter_charQ2:treatment        NA         NA      NA       NA    
Quarter_charQ3:treatment        NA         NA      NA       NA    
Quarter_charQ4:treatment        NA         NA      NA       NA    
Quarter_charQ5:treatment        NA         NA      NA       NA    
Quarter_charQ6:treatment        NA         NA      NA       NA    
Quarter_charQ7:treatment        NA         NA      NA       NA    
Quarter_charQ8:treatment        NA         NA      NA       NA    
Quarter_charQ9:treatment        NA         NA      NA       NA    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.21 on 224 degrees of freedom
Multiple R-squared:  0.1663,    Adjusted R-squared:  0.1105 
F-statistic: 2.979 on 15 and 224 DF,  p-value: 0.000233
# issue with NAs arises from having too many interaction terms. When Quarter is converted to a character variable, each unique quarter value becomes its own level, which leads to a large number of levels for the interaction terms. This can cause problems with collinearity and the model may become overspecified.

#let's see if the same issue applies to CR4 MC model

r3CR4 <-  lm(NewFirms ~ CR4MC + Quarter_char + treatment + Quarter_char*treatment, data = df_NA_long)
summary(r3CR4)

Call:
lm(formula = NewFirms ~ CR4MC + Quarter_char + treatment + Quarter_char * 
    treatment, data = df_NA_long)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0477 -1.2228 -0.3506  0.7151  9.4808 

Coefficients: (15 not defined because of singularities)
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 4.2699     0.6233   6.850 7.04e-11 ***
CR4MC                      -3.5763     0.5411  -6.609 2.79e-10 ***
Quarter_charQ11            -0.2711     0.7443  -0.364   0.7161    
Quarter_charQ12            -0.9593     0.7444  -1.289   0.1988    
Quarter_charQ13            -0.5163     0.7446  -0.693   0.4888    
Quarter_charQ14            -0.6495     0.7450  -0.872   0.3842    
Quarter_charQ15            -0.6710     0.7451  -0.900   0.3688    
Quarter_charQ16             0.5596     0.7458   0.750   0.4539    
Quarter_charQ2             -1.4000     0.7443  -1.881   0.0613 .  
Quarter_charQ3             -1.3990     0.7443  -1.880   0.0615 .  
Quarter_charQ4             -0.4390     0.7444  -0.590   0.5559    
Quarter_charQ5             -0.1757     0.7443  -0.236   0.8136    
Quarter_charQ6             -0.9818     0.7443  -1.319   0.1885    
Quarter_charQ7             -0.4794     0.7443  -0.644   0.5202    
Quarter_charQ8              1.4240     0.7444   1.913   0.0570 .  
Quarter_charQ9             -0.5016     0.7443  -0.674   0.5011    
treatment                       NA         NA      NA       NA    
Quarter_charQ11:treatment       NA         NA      NA       NA    
Quarter_charQ12:treatment       NA         NA      NA       NA    
Quarter_charQ13:treatment       NA         NA      NA       NA    
Quarter_charQ14:treatment       NA         NA      NA       NA    
Quarter_charQ15:treatment       NA         NA      NA       NA    
Quarter_charQ16:treatment       NA         NA      NA       NA    
Quarter_charQ2:treatment        NA         NA      NA       NA    
Quarter_charQ3:treatment        NA         NA      NA       NA    
Quarter_charQ4:treatment        NA         NA      NA       NA    
Quarter_charQ5:treatment        NA         NA      NA       NA    
Quarter_charQ6:treatment        NA         NA      NA       NA    
Quarter_charQ7:treatment        NA         NA      NA       NA    
Quarter_charQ8:treatment        NA         NA      NA       NA    
Quarter_charQ9:treatment        NA         NA      NA       NA    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.105 on 224 degrees of freedom
Multiple R-squared:  0.2435,    Adjusted R-squared:  0.1928 
F-statistic: 4.807 on 15 and 224 DF,  p-value: 4.585e-08
#same issue

#in conclusion we can be sure that market concentration is a strong predicator of number of new firms regardless of model. The higher the market concentration, the fewer firms enter the market. Second, the plots and the different regressions point to a small positive treatment effect. However, the regression model is exposed to multicollinearity. When trying to control for this with an interaction term, the interaction istelf, but not the treatment effect is significant. Ultimately, the results are inconclusive.